Архивирование в AWS Glacier (часть 2)

21.07.2017

В этой заметке описывается скрипт для создания архива из набора файлов на сервере компании и копирования этого архива в AWS Glacier.

Логика скрипта:

  1. Проверка наличия папки-источника для архивирования.
  2. Упаковка папки в ZIP-архив.
  3. Проверка содержимого ZIP-архива и составление списка файлов в архиве.
  4. Удаление файлов из папки-источника по списку файлов из ZIP-архива.
  5. Удаление оригинальной папки, если ее размер равен нулю (если удалены все файлы на предыдущем этапе).
  6. Регистрация архива и его содержимого в базе данных компании, установка состояния архива как ready to transfer.
  7. Проверка наличия файлов для копирования в AWS Glacier (поиск в базе данных архивов с состоянием ready to transfer).
  8. Проверка размера архива (размер должен быть менее 4 ГБ).
  9. Копирование архива в AWS Glacier.
  10. Регистрация состояния архива в базе данных как transfered.

Код скрипта:

# Folders for archiving
$SourceFolder = "C:\1_Archive"
# Folder for saving archives
$ArchiveFolder = "C:\2_Transfer"

# Notifications
$MailServer = "mail.domain.com"
$MailEncoding = [System.Text.Encoding]::UTF8
$MailFrom = "$env:computername@domain.com"
$MailSubject = "Glacier backup"
[string[]]$MailTO = "admin@domain.com"
[string[]]$MailCC = ""

# AWS variables
$AWSAccountID = 'AWS_Account_ID'
$AWSRegion = 'AWS_region'
$AWSVaultName = 'AWS_Vault_Name'
$AWSProfileAccessKey = "AWS_Access_Key"
$AWSProfileSecretKey = "AWS_Secret_Key"

# SQL variables
$SQLServer = 'SQLServer.domain.com'
$SQLDatabase = 'AWS-Glacier'
$SQLTable = '[archive-2017]'
$SQLUsername = 'db_user'
$SQLPassword = 'db_password'

# Registering AWS libraries
Add-Type -Path "C:\Program Files (x86)\AWS SDK for .NET\bin\Net45\AWSSDK.Core.dll"
Add-Type -Path "C:\Program Files (x86)\AWS SDK for .NET\bin\Net45\AWSSDK.Glacier.dll"
Add-Type -Path "C:\Windows\Microsoft.NET\Framework64\v4.0.30319\System.IO.Compression.FileSystem.dll"

Function Write-ScriptLog {
  Param(
    [CmdletBinding()] 
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Message,
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$LogFile
  )
  Process {
    $LogMessage = Get-Date -uformat "%d.%m.%Y %H:%M:%S"
    $LogMessage += "`t"
    $LogMessage += $Message
    $LogMessage | Out-File -FilePath $LogFile -Append
  }
}#End Function

Function Get-FolderSize {
  Param(
    [CmdletBinding()] 
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Path
  )
  Begin {
    $oFSO = New-Object -comobject Scripting.FileSystemObject
  }
  Process{
    $oFolder = $oFSO.GetFolder($Path)
    Return ($oFolder.Size)
  } 
 } # End Function

Function Invoke-Query {
  Param(
    [CmdletBinding()] 
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$ServerInstance,
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Database,
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Username,
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Password,
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Query
  )
  Process {
    $ConnectionString = `
      "Server=$ServerInstance;uid=$Username;pwd=$Password;Database=$Database;Integrated Security=False;"
    $Connection = New-Object System.Data.SqlClient.SqlConnection
    $Connection.ConnectionString = $ConnectionString
    $Connection.Open()
    $Command = New-Object System.Data.SQLClient.SQLCommand
    $Command.Connection = $Connection
    $Command.CommandText = $Query
    $Result = $Command.ExecuteReader()
    $Datatable = New-Object “System.Data.DataTable”
    $Datatable.Load($Result)
    $Connection.Close()
    Return $Datatable
  }
}#End Function

Function Invoke-NonQuery {
  Param(
    [CmdletBinding()] 
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$ServerInstance,
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Database,
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Username,
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Password,
    [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
    [String]$Query
  )
  Process {
    $ConnectionString = `
      "Server=$ServerInstance;uid=$Username;pwd=$Password;Database=$Database;Integrated Security=False;"
    $Connection = New-Object System.Data.SqlClient.SqlConnection
    $Connection.ConnectionString = $ConnectionString
    $Connection.Open()
    $Command = New-Object System.Data.SQLClient.SQLCommand
    $Command.Connection = $Connection
    $Command.CommandText = $Query
    $Result = $Command.ExecuteNonQuery()
    $Connection.Close()
  }
}#End Function

# --- Start ---

# Calculating variables
$CurrentDate = Get-Date
$ScriptFolder = $MyInvocation.MyCommand.Path.SubString(0,($MyInvocation.MyCommand.Path.Length `
  - $MyInvocation.MyCommand.Name.Length))
$LogFile = $ScriptFolder + 'Logs\' + (Get-Date -format yyyy_MM_dd) + "_" `
  + $MyInvocation.MyCommand.Name.SubString(0,($MyInvocation.MyCommand.Name.Length - 4)) + ".log"

# Log-file creation
If (-not(Test-Path ($ScriptFolder + 'Logs') -PathType Container )) {
  New-Item -ItemType Directory -Path ($ScriptFolder + 'Logs')
}
Out-File -FilePath $LogFile


Write-ScriptLog -LogFile $LogFile -Message ($MyInvocation.MyCommand.Name + " started")

Write-ScriptLog -LogFile $LogFile -Message ("======================== Input data ========================")
Write-ScriptLog -LogFile $LogFile -Message ("Source folders =  $SourceFolder")
Write-ScriptLog -LogFile $LogFile -Message ("Archive folder  = $ArchiveFolder")
Write-ScriptLog -LogFile $LogFile -Message ("AWS Account ID = $AWSAccountID")
Write-ScriptLog -LogFile $LogFile -Message ("AWS Region = $AWSRegion")
Write-ScriptLog -LogFile $LogFile -Message ("AWS Vault Name = $AWSVaultName")
Write-ScriptLog -LogFile $LogFile -Message ("SQL Server = $SQLServer")
Write-ScriptLog -LogFile $LogFile -Message ("SQL Database = $SQLDatabase")
Write-ScriptLog -LogFile $LogFile -Message ("SQL Username = $SQLUsername")

Write-ScriptLog -LogFile $LogFile -Message ("======================== Processing ========================")


# Compressing files
Write-ScriptLog -LogFile $LogFile -Message ("------------------------ Archiving -------------------------")
#[Void][Reflection.Assembly]::LoadWithPartialName('System.IO.Compression.FileSystem') 
$ArchiveCompression = [System.IO.Compression.CompressionLevel]::Fastest
$TextEncoding = [system.Text.Encoding]::UTF8
Write-ScriptLog -LogFile $LogFile -Message ("Archiving is started")
$SourceSubFolders= Get-ChildItem $SourceFolder -Force -Directory
Write-ScriptLog -LogFile $LogFile -Message ("There are " + $SourceSubFolders.Count `
  + " folders for archiving")
ForEach ($Folder in $SourceSubFolders) {
  $FolderPath = $Folder.FullName
  Write-ScriptLog -LogFile $LogFile -Message ("Source folder is $FolderPath")
  $FolderSize = Get-FolderSize -Path $FolderPath
  Write-ScriptLog -LogFile $LogFile -Message ("  Source folder size is " `
    + [math]::ceiling($FolderSize / 1mb) + " MB")
  If ($FolderSize -gt 0) {
    $ArchiveName = $Folder.Name.Replace("\", "_").Replace(" ","") + ".zip"
    $ArchivePath = $ArchiveFolder + "\" + $ArchiveName
    Write-ScriptLog -LogFile $LogFile -Message ("  Archive file is $ArchiveName")
    Write-ScriptLog -LogFile $LogFile -Message ("  Archiving started")
    # Making ZIP-file
    If (Test-Path ($ArchivePath) -PathType Leaf ) { Remove-Item $ArchivePath -Force -ErrorAction Stop }
    [System.IO.Compression.ZipFile]::CreateFromDirectory($FolderPath, $ArchivePath, `
      $ArchiveCompression, $False)
    Write-ScriptLog -LogFile $LogFile -Message ("  Archiving finished")
    Write-ScriptLog -LogFile $LogFile -Message ("  Archive size is " `
      + [math]::ceiling((Get-Item $ArchivePath).Length / 1mb) + " MB")
    # Testing ZIP-file an deleting source files
    Write-ScriptLog -LogFile $LogFile -Message ("  Testing the archive and removing source files")
    $ArchiveFiles = [IO.Compression.ZipFile]::OpenRead($ArchivePath)
    $ArchiveContent = $Nothing
    ForEach ($CurrentArchiveFile in $ArchiveFiles.Entries) {
      $ArchiveContent += $CurrentArchiveFile.FullName.Replace("/","\") + "`t" `
        + $CurrentArchiveFile.Length + "`t" `
        + $CurrentArchiveFile.LastWriteTime.ToString("dd.MM.yyyy") + "`r`n"
      $CurrentFilePath = ($FolderPath + '\' + $CurrentArchiveFile.FullName.Replace("/","\"))
      If (Test-Path($CurrentFilePath) -PathType Leaf) {
        Try {
          Remove-Item $CurrentFilePath -Force -ErrorAction Stop
        } Catch {
          Write-ScriptLog -LogFile $LogFile -Message ("----> Error: " + $_.Exception.Message)
          Send-MailMessage -SmtpServer $MailServer -Encoding $MailEncoding -From $MailFrom -To $MailTo `
            -CC $MailCC -Subject $MailSubject `
            -BodyAsHtml "$_.Exception.Message<br />Please review $LogFile on $env:computername for details."
        }
      }
    }
    $ArchiveFiles.Dispose()
    # Checking source folder size after archiving and deleting it
    $CurrentFolderSize = Get-FolderSize -Path $Folder.FullName
    If ($CurrentFolderSize -eq 0) {
      Try {
        Remove-Item $Folder.FullName -Recurse -Force -Confirm:$False -ErrorAction Stop
      } Catch {
        Write-ScriptLog -LogFile $LogFile -Message ("----> Error: " + $_.Exception.Message)
        Send-MailMessage -SmtpServer $MailServer -Encoding $MailEncoding -From $MailFrom -To $MailTo `
          -CC $MailCC -Subject $MailSubject `
          -BodyAsHtml "$_.Exception.Message<br />Please review $LogFile on $env:computername for details."
      }
    }
    # Prepare values for SQL query
    $SQLArchiveName = $ArchiveName.Replace("'","''")
    $SQLArchiveContent = $ArchiveContent.Replace("'","''")
    $SQLArchiveTimeStamp = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
    $SQLState = "ready to transfer"
    $SQLNotice = $ArchivePath
    # Registering the arhive in the database
    Write-ScriptLog -LogFile $LogFile -Message ("  Registering the arhive in the database")
    $InsertQuery = "INSERT INTO " + $SQLTable `
      + " ([archive-name],[aws-account-id],[aws-vault-name],[aws-archive-id],[archive-content]," `
      + "[creation-timestamp],[last-access-timestamp],[state],[notice])"
    $InsertQuery += "  VALUES(N'{0}', null, null, null, N'{1}', '{2}', null, '{3}', N'{4}')" -f `
      $SQLArchiveName, $SQLArchiveContent, $SQLArchiveTimeStamp, $SQLState, $SQLNotice
    Invoke-NonQuery -ServerInstance $SQLServer -Database $SQLDatabase `
      -Username $SQLUsername -Password $SQLPassword -Query $InsertQuery
  } Else {
    Write-ScriptLog -LogFile $LogFile -Message ("  Nothing to archive, deleting the source forlder")
    Try {
      Remove-Item $Folder.FullName -Recurse -Force -Confirm:$False -ErrorAction Stop
    } Catch {
      Write-ScriptLog -LogFile $LogFile -Message ("----> Error: " + $_.Exception.Message)
      Send-MailMessage -SmtpServer $MailServer -Encoding $MailEncoding -From $MailFrom -To $MailTo `
        -CC $MailCC -Subject $MailSubject `
        -BodyAsHtml "$_.Exception.Message<br />Please review $LogFile on $env:computername for details."
    }
  }
}
Write-ScriptLog -LogFile $LogFile -Message ("Archiving is finished")

# Transfering files
Write-ScriptLog -LogFile $LogFile -Message ("------------------------ Transfering -----------------------")
# Openning AWS connection
$AWSEndpoint = [Amazon.RegionEndpoint]::GetBySystemName($AWSRegion)
$AWSGlacierTransferManager = [Amazon.Glacier.Transfer.ArchiveTransferManager]::New($AWSProfileAccessKey, `
  $AWSProfileSecretKey, $AWSEndpoint)
Write-ScriptLog -LogFile $LogFile -Message ("Connection to AWS Glacier is opened")
# Requesting archives from the database
$Query = "SELECT * FROM " + $SQLTable + " WHERE [state] = 'ready to transfer'"
$TransferFiles = Invoke-Query -ServerInstance $SQLServer -Database $SQLDatabase `
  -Username $SQLUsername -Password $SQLPassword -Query $Query
Write-ScriptLog -LogFile $LogFile -Message ("There are " + $TransferFiles.Count `
  + " archives for transfering to AWS Glacier")
ForEach ($CurrentFile in $TransferFiles) {
  $CurrentFileId = $CurrentFile."id"
  $CurrentFileName = $CurrentFile."archive-name"
  $CurrentFilePath = $CurrentFile."notice"
  If (Test-Path($CurrentFilePath) -PathType Leaf) {
    Write-ScriptLog -LogFile $LogFile `
      -Message ("$CurrentFilePath archive is ready to be transfered to AWS Glacier")
    $CurrentFileSize = (Get-Item $CurrentFilePath).Length
    If ($CurrentFileSize -lt 4294967296) {
      Write-ScriptLog -LogFile $LogFile -Message ("  The archive size is OK for transferring in one shot")
      # Transferring the file to AWS Glacier
      Write-ScriptLog -LogFile $LogFile -Message ("  Transfering the archive to AWS Glacier") 
      $AWSTransferResult = $AWSGlacierTransferManager.Upload($AWSVaultName, $ArchiveDesc, $CurrentFilePath)
      Write-ScriptLog -LogFile $LogFile -Message ("  The archive is transfered to AWS Glacier and has Id " `
        + $AWSTransferResult.ArchiveId)
      # Registering the arhive in the database
      Write-ScriptLog -LogFile $LogFile -Message ("  Registering the arhive in the database")
      $SQLAWSAccountId = $AWSAccountID
      $SQLAWSVaultName = $AWSVaultName
      $SQLAWSArchiveId = $AWSTransferResult.ArchiveId
      $SQLAccessTimeStamp = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
      $SQLState = "transfered"
      $UpdateQuery = "UPDATE " + $SQLTable + " SET [aws-account-id] = '$SQLAWSAccountId', [aws-vault-name] " `
        + "= '$AWSVaultName', [aws-archive-id] = '$SQLAWSArchiveId',"
      $UpdateQuery += " [state] = '$SQLState', [last-access-timestamp] = '$SQLAccessTimeStamp' " `
        + "WHERE [id] = $CurrentFileId"
      Invoke-NonQuery -ServerInstance $SQLServer -Database $SQLDatabase -Username $SQLUsername `
        -Password $SQLPassword -Query $UpdateQuery
      Write-ScriptLog -LogFile $LogFile `
        -Message ("  The arhive is registered in the database with ID $CurrentFileId")
    } Else {
      Write-ScriptLog -LogFile $LogFile `
        -Message ("  The archive size is too big for transferring in one shot")
      Write-ScriptLog -LogFile $LogFile -Message ("  Use multipart upload method")
      
      Send-MailMessage -SmtpServer $MailServer -Encoding $MailEncoding -From $MailFrom -To $MailTo `
        -CC $MailCC -Subject $MailSubject `
        -BodyAsHtml "$CurrentFilePath archive size is too big for transferring in one shot<br />" `
          + "Please review $LogFile on $env:computername for details."
        
      $SQLAccessTimeStamp = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
      $SQLState = "too big size"
      $UpdateQuery = "UPDATE " + $SQLTable + " SET [state] = '$SQLState', [last-access-timestamp] = " `
        + "'$SQLAccessTimeStamp' WHERE [id] = $CurrentFileId"
      Invoke-NonQuery -ServerInstance $SQLServer -Database $SQLDatabase -Username $SQLUsername `
        -Password $SQLPassword -Query $UpdateQuery
    }
  }
  Else {
    Write-ScriptLog -LogFile $LogFile -Message ("$CurrentFilePath archive is not found")
    Send-MailMessage -SmtpServer $MailServer -Encoding $MailEncoding -From $MailFrom -To $MailTo `
      -CC $MailCC -Subject $MailSubject `
      -BodyAsHtml "$CurrentFilePath archive is not found<br />" `
      + "Please review $LogFile on $env:computername for details."
    $SQLAccessTimeStamp = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
    $SQLState = "archive not found"
    $UpdateQuery = "UPDATE " + $SQLTable + " SET [state] = '$SQLState', [last-access-timestamp] = " `
      + "'$SQLAccessTimeStamp' WHERE [id] = $CurrentFileId"
    Invoke-NonQuery -ServerInstance $SQLServer -Database $SQLDatabase `
      -Username $SQLUsername -Password $SQLPassword -Query $UpdateQuery
  }
}
$AWSGlacierTransferManager.Dispose()
Write-ScriptLog -LogFile $LogFile -Message ("Connection to AWS Glacier is closed")

# --- End ---
Write-ScriptLog -LogFile $LogFile -Message ("============================================================")
Write-ScriptLog -LogFile $LogFile -Message ($MyInvocation.MyCommand.Name + " stopped")

Входные данные скрипта:

  • SourceFolder - пусть, где хранятся папки-источники
  • ArchiveFolder - путь, куда скрипт сохраняет ZIP-архивы
  • MailServer - почтовый сервер компании для отправки уведомлений о работе скрипта
  • MailEncoding - кодировка писем
  • MailFrom - от чьего имени отправляются письма
  • MailSubject - тема писем
  • MailTO - кому отправлять письма
  • MailCC - кому отправлять копии писем
  • AWSAccountID - ID учетной записи Amazon Web Services
  • AWSRegion - регион, где находится коллекция архивов
  • AWSVaultName - имя коллекции архивов
  • AWSProfileAccessKey - учетная запись AWS с правом доступа к коллекции архивов
  • AWSProfileSecretKey - ключ к учетной записи AWS
  • SQLServer - SQL сервер компании
  • SQLDatabase - имя базы данных
  • SQLTable - имя таблицы
  • SQLUsername - имя пользователя с правом вносить изменения в SQL базу данных
  • SQLPassword - пароль пользователя SQL

Результат работы скрипта:

  • Упакованная папка-исходник, которая хранится и на сервере компании, и в облаке AWS Glacier.
  • Запись в базе данных компании об упакованной папке, её содержимом и состоянии в облаке.
  • Лог-файл работы скрипта.
Продолжение в заметке "Servers - Архивирование в AWS Glacier (часть 3)".
Метки