Архивирование в AWS Glacier (часть 4)

25.07.2017

В этой заметке описывается скрипт для проверки содержимого облачного архива AWS Glacier и удаления локальных файлов, успешно размещенных в облаке.

Логика скрипта:

  • Найти файл с номером задания.
  • Подключиться к AWS Glacier и скачать результат инвентаризации коллекции архивов.
  • В базе данных компании найти все архивы с состоянием transfered.
  • Сравнить размеры локального архива и файла в облаке.
  • Удалить локальный архив и пометить его состояние в базе данных как archived.

Код скрипта:

# Folder for saving archives
$ArchiveFolder = "C:\2_Transfer"

# Notifications
$MailServer = "mail.domain.com"
$MailEncoding = [System.Text.Encoding]::UTF8
$MailFrom = "$env:computername@domain.com"
$MailSubject = "Glacier backup"
[string[]]$MailTO = "admin@domain.com"
[string[]]$MailCC = ""

# AWS variables
$AWSAccountID = 'AWS_Account_ID'
$AWSRegion = 'AWS_region'
$AWSVaultName = 'AWS_Vault_Name'
$AWSProfileAccessKey = "AWS_Access_Key"
$AWSProfileSecretKey = "AWS_Secret_Key"
$AWSJobIdFileName = "AWSGlacier-InventoryJobId.txt"

# SQL variables
$SQLServer = 'SQLServer.domain.com'
$SQLDatabase = 'AWS-Glacier'
$SQLTable = '[archive-2017]'
$SQLUsername = 'db_user'
$SQLPassword = 'db_password'

# Registering AWS libraries
Add-Type -Path "C:\Program Files (x86)\AWS SDK for .NET\bin\Net45\AWSSDK.Core.dll"
Add-Type -Path "C:\Program Files (x86)\AWS SDK for .NET\bin\Net45\AWSSDK.Glacier.dll"

Function Write-ScriptLog {
 Param(
   [CmdletBinding()]
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$Message,
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$LogFile
 )
 Process {
   $LogMessage = Get-Date -uformat "%d.%m.%Y %H:%M:%S"
   $LogMessage += "`t"
   $LogMessage += $Message
   $LogMessage | Out-File -FilePath $LogFile -Append
 }
}#End Function

Function Invoke-Query {
 Param(
   [CmdletBinding()]
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$ServerInstance,
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$Database,
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$Username,
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$Password,
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$Query
 )
 Process {
   $ConnectionString = "Server=$ServerInstance;uid=$Username;pwd=$Password;Database=$Database;Integrated Security=False;"
   $Connection = New-Object System.Data.SqlClient.SqlConnection
   $Connection.ConnectionString = $ConnectionString
   $Connection.Open()
   $Command = New-Object System.Data.SQLClient.SQLCommand
   $Command.Connection = $Connection
   $Command.CommandText = $Query
   $Result = $Command.ExecuteReader()
   $Datatable = New-Object “System.Data.DataTable”
   $Datatable.Load($Result)
   $Connection.Close()
   Return $Datatable
 }
}#End Function

Function Invoke-NonQuery {
 Param(
   [CmdletBinding()]
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$ServerInstance,
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$Database,
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$Username,
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$Password,
   [Parameter(ValueFromPipeline=$true,ValueFromPipelineByPropertyName=$true)]
   [String]$Query
 )
 Process {
   $ConnectionString = "Server=$ServerInstance;uid=$Username;pwd=$Password;Database=$Database;Integrated Security=False;"
   $Connection = New-Object System.Data.SqlClient.SqlConnection
   $Connection.ConnectionString = $ConnectionString
   $Connection.Open()
   $Command = New-Object System.Data.SQLClient.SQLCommand
   $Command.Connection = $Connection
   $Command.CommandText = $Query
   $Result = $Command.ExecuteNonQuery()
   $Connection.Close()
 }
}#End Function

# --- Start ---

# Calculating variables
$CurrentDate = Get-Date
$ScriptFolder = $MyInvocation.MyCommand.Path.SubString(0,($MyInvocation.MyCommand.Path.Length `
 - $MyInvocation.MyCommand.Name.Length))
$LogFile = $ScriptFolder + 'Logs\' + (Get-Date -format yyyy_MM_dd) + "_" `
 + $MyInvocation.MyCommand.Name.SubString(0,($MyInvocation.MyCommand.Name.Length - 4)) + ".log"

# Log-file creation
If (-not(Test-Path ($ScriptFolder + 'Logs') -PathType Container )) {
 New-Item -ItemType Directory -Path ($ScriptFolder + 'Logs')
}
Out-File -FilePath $LogFile

Write-ScriptLog -LogFile $LogFile -Message ($MyInvocation.MyCommand.Name + " started")

Write-ScriptLog -LogFile $LogFile -Message ("============================== Input data ==============================")
Write-ScriptLog -LogFile $LogFile -Message ("AWS Account ID = $AWSAccountID")
Write-ScriptLog -LogFile $LogFile -Message ("AWS Region = $AWSRegion")
Write-ScriptLog -LogFile $LogFile -Message ("AWS Vault Name = $AWSVaultName")
Write-ScriptLog -LogFile $LogFile -Message ("AWS Job Id File Name = $AWSJobIdFileName")

Write-ScriptLog -LogFile $LogFile -Message ("============================== Processing ==============================")

# Input file name
$AWSJobIdFilePath = $ScriptFolder + $AWSJobIdFileName
$ProcessedArchives = $Nothing
$NotProcessedArchives = $Nothing

If (Test-Path($AWSJobIdFilePath)) {

 Write-ScriptLog -LogFile $LogFile -Message ("Reading $AWSJobOutputFileName file")
 $AWSGlacerJobId = Get-Content $AWSJobIdFilePath
 Write-ScriptLog -LogFile $LogFile -Message ("Inventory retrieval job id is $AWSGlacerJobId")

 Write-ScriptLog -LogFile $LogFile -Message ("Connecting to AWS")
 $AWSEndpoint = [Amazon.RegionEndpoint]::GetBySystemName($AWSRegion)
 # Set inventory job for a AWS Glacier vault
 $AWSGlacierClient = [Amazon.Glacier.AmazonGlacierClient]::New($AWSProfileAccessKey, $AWSProfileSecretKey, $AWSEndpoint)
 Write-ScriptLog -LogFile $LogFile -Message ("Connection to AWS Glacier is opened")

 Write-ScriptLog -LogFile $LogFile -Message ("Requesting job output")
 $AWSGlacierJobOutputRequest  = [Amazon.Glacier.Model.GetJobOutputRequest]::new()
 $AWSGlacierJobOutputRequest.AccountId = $AWSAccountID
 $AWSGlacierJobOutputRequest.VaultName = $AWSVaultName
 $AWSGlacierJobOutputRequest.JobId = $AWSGlacerJobId
 Try {
   $AWSGlacierOutputResult = $AWSGlacierClient.GetJobOutput($AWSGlacierJobOutputRequest)
   Write-ScriptLog -LogFile $LogFile -Message ("Job output is received with status " + $AWSGlacierOutputResult.Status)
 }
 Catch {
   Write-ScriptLog -LogFile $LogFile -Message ("----> Error: " + $_.Exception.Message)
 }

 If ($AWSGlacierOutputResult -ne $Nothing) {
     Write-ScriptLog -LogFile $LogFile -Message ("Parsing job output")
     [Byte[]]$buffer = New-Object System.Byte[] 4096
     $EncodedText = New-Object -TypeName System.Text.ASCIIEncoding
     $AWSGlacierOutputResultContent = $Nothing
     While(($i = $AWSGlacierOutputResult.Body.Read($buffer, 0, $buffer.Length)) -ne 0) {
       $AWSGlacierOutputResultContent += $EncodedText.GetString($buffer, 0, $i)
     }
     $AWSGlacierContent = $AWSGlacierOutputResultContent | ConvertFrom-Json
     Write-ScriptLog -LogFile $LogFile -Message ("Job output is parsed")
     Write-ScriptLog -LogFile $LogFile -Message ("There are " + $AWSGlacierContent.ArchiveList.Count + " archives in the vault")
     $AWSGlacierClient.Dispose()
     Write-ScriptLog -LogFile $LogFile -Message ("Connection to AWS Glacier is closed")

     Write-ScriptLog -LogFile $LogFile -Message ("Updating archives statuses in the database")
     # Requesting archives from the database
     $Query = "SELECT * FROM " + $SQLTable + " WHERE [state] = 'transfered'"
     $TransferedFiles = Invoke-Query -ServerInstance $SQLServer -Database $SQLDatabase -Username $SQLUsername `
       -Password $SQLPassword -Query $Query
     Write-ScriptLog -LogFile $LogFile -Message ("There are " + $TransferedFiles.Count + " archives transfered to AWS Glacier")
     ForEach ($CurrentFile in $TransferedFiles) {
       $CurrentFileId = $CurrentFile."id"
       $CurrentFileName = $CurrentFile."archive-name"
       $CurrentFileAwsId = $CurrentFile."aws-archive-id"
       $CurrentFilePath = $ArchiveFolder + "\" + $CurrentFileName
       # Looking for current archive id in the vault
       Write-ScriptLog -LogFile $LogFile -Message ("  $CurrentFileName is waiting for successfull transrefing confirmation")
       ForEach ($AWSArchive in $AWSGlacierContent.ArchiveList) {
         If ($AWSArchive.ArchiveId -eq $CurrentFileAwsId) {
           Write-ScriptLog -LogFile $LogFile -Message ("    $CurrentFileName is found is the vault")
           If ((Get-Item $CurrentFilePath).Length -eq $AWSArchive.Size) {
             Write-ScriptLog -LogFile $LogFile -Message ("    The size of the archives at local server and at AWS are the same")
             Try {
               Remove-Item $CurrentFilePath -Force -Confirm:$False -ErrorAction Stop
               Write-ScriptLog -LogFile $LogFile -Message ("    $CurrentFilePath is deleted")
             } Catch {
               Write-ScriptLog -LogFile $LogFile -Message ("----> Error: " + $_.Exception.Message)
             }
             Write-ScriptLog -LogFile $LogFile -Message ("    Updating the arhive status in the database")
             $SQLAWSArchiveId = $AWSTransferResult.ArchiveId
             $SQLAccessTimeStamp = (Get-Date).ToString("yyyy-MM-dd HH:mm:ss")
             $SQLState = "archived"
             $SQLNotice = "size: " + $AWSArchive.size + " bytes, creation: " + $AWSArchive.CreationDate
             $UpdateQuery = "UPDATE " + $SQLTable + " SET [state] = '$SQLState',"
             $UpdateQuery += " [notice] = '$SQLNotice', [last-access-timestamp] = '$SQLAccessTimeStamp' WHERE [id] = $CurrentFileId"
             Invoke-NonQuery -ServerInstance $SQLServer -Database $SQLDatabase -Username $SQLUsername `
               -Password $SQLPassword -Query $UpdateQuery
             $ProcessedArchives += $CurrentFileName + "; "
           } Else {
             $FailedArchives += $CurrentFileName + "; "
           }
         }
       }
     }
     Write-ScriptLog -LogFile $LogFile -Message ("Archives statuses are updated the database")
    
     Send-MailMessage -SmtpServer $MailServer -Encoding $MailEncoding -From $MailFrom -To $MailTo -CC $MailCC `
       -Subject $MailSubject -BodyAsHtml "Successfully processed archives: $ProcessedArchives<br>" + `
         "Failed archives: $FailedArchives<br />Please review $LogFile on $env:computername for details."

     Write-ScriptLog -LogFile $LogFile -Message ("Deleting $AWSJobIdFileName file")
     Try {
       Remove-Item $AWSJobIdFilePath -Force -Confirm:$False -ErrorAction Stop
     Write-ScriptLog -LogFile $LogFile -Message ("$AWSJobIdFileName file is deleted")
     } Catch {
       Write-ScriptLog -LogFile $LogFile -Message ("----> Error: " + $_.Exception.Message)
     }

 } Else {
   Write-ScriptLog -LogFile $LogFile -Message ("Job output is empty")

   Send-MailMessage -SmtpServer $MailServer -Encoding $MailEncoding -From $MailFrom -To $MailTo -CC $MailCC `
     -Subject $MailSubject -BodyAsHtml "AWS job output is empty<br />Please review $LogFile on $env:computername for details."

   If (((Get-Date) - (get-item $AWSJobIdFilePath).CreationTime).Days -ge 2) {
     Write-ScriptLog -LogFile $LogFile -Message ("$AWSJobIdFileName file is outdated")
     Write-ScriptLog -LogFile $LogFile -Message ("Deleting $AWSJobIdFileName file")
     Try {
       Remove-Item $AWSJobIdFilePath -Force -Confirm:$False -ErrorAction Stop
     Write-ScriptLog -LogFile $LogFile -Message ("$AWSJobIdFileName file is deleted")
     } Catch {
       Write-ScriptLog -LogFile $LogFile -Message ("----> Error: " + $_.Exception.Message)
     }
   }
 }
}
Else {
 Write-ScriptLog -LogFile $LogFile -Message ("The processing is stopped")
 Write-ScriptLog -LogFile $LogFile -Message ("Use Process-AWSGlacierInventory.ps1 script to get information about AWS archives")
}

# --- End ---
Write-ScriptLog -LogFile $LogFile -Message ("========================================================================")
Write-ScriptLog -LogFile $LogFile -Message ($MyInvocation.MyCommand.Name + " stopped")

Входные данные скрипта:

  • ArchiveFolder - путь, куда скрипт сохраняет ZIP-архивы
  • MailServer - почтовый сервер компании для отправки уведомлений о работе скрипта
  • MailEncoding - кодировка писем
  • MailFrom - от чьего имени отправляются письма
  • MailSubject - тема писем
  • MailTO - кому отправлять письма
  • MailCC - кому отправлять копии писем
  • AWSAccountID - ID учетной записи Amazon Web Services
  • AWSRegion - регион, где находится коллекция архивов
  • AWSVaultName - имя коллекции архивов
  • AWSProfileAccessKey - учетная запись AWS с правом доступа к коллекции архивов
  • AWSProfileSecretKey - ключ к учетной записи AWS
  • SQLServer - SQL сервер компании
  • SQLDatabase - имя базы данных
  • SQLTable - имя таблицы
  • SQLUsername - имя пользователя с правом вносить изменения в SQL базу данных
  • SQLPassword - пароль пользователя SQL
  • AWSJobIdFileName - имя файла, в котором нужно сохранить номер задания (файл сохраняется в той же папке, где лежит скрипт)

Результат работы скрипта:

  • Завершенная процедура архивирования файла в облаке AWS Glacier.
  • Лог-файл работы скрипта.
Продолжение в заметке "Servers - Архивирование в AWS Glacier (часть 5)".
Метки