|
#!/bin/bash |
|
|
|
################################################################################ |
|
# AWS Backup Detection Mechanism Test Script |
|
# |
|
# This script tests the AWS Backup detection mechanism in openshift-install |
|
# destroy functionality. It creates a backup, verifies detection, and tests |
|
# the destroy process. |
|
# |
|
# Usage: |
|
# ./test-aws-backup-detection.sh [options] |
|
# |
|
# Options: |
|
# --cluster-id <id> Cluster infrastructure ID (default: auto-detect) |
|
# --region <region> AWS region (default: us-east-1) |
|
# --instance-id <id> EC2 instance ID to backup (default: first master) |
|
# --vault-name <name> Backup vault name (default: <cluster-id>-backup-vault) |
|
# --install-dir <dir> Installation directory (default: current directory) |
|
# --skip-cleanup Skip cleanup of backup resources after test |
|
# --help Show this help message |
|
################################################################################ |
|
|
|
set -euo pipefail |
|
|
|
# Colors for output |
|
RED='\033[0;31m' |
|
GREEN='\033[0;32m' |
|
YELLOW='\033[1;33m' |
|
BLUE='\033[0;34m' |
|
NC='\033[0m' # No Color |
|
|
|
# Default values |
|
CLUSTER_ID="" |
|
AWS_REGION="us-east-1" |
|
INSTANCE_ID="" |
|
VAULT_NAME="" |
|
INSTALL_DIR="." |
|
SKIP_CLEANUP=false |
|
INSTALLER_BINARY="openshift-install" |
|
|
|
# Parse command line arguments |
|
while [[ $# -gt 0 ]]; do |
|
case $1 in |
|
--cluster-id) |
|
CLUSTER_ID="$2" |
|
shift 2 |
|
;; |
|
--region) |
|
AWS_REGION="$2" |
|
shift 2 |
|
;; |
|
--instance-id) |
|
INSTANCE_ID="$2" |
|
shift 2 |
|
;; |
|
--vault-name) |
|
VAULT_NAME="$2" |
|
shift 2 |
|
;; |
|
--install-dir) |
|
INSTALL_DIR="$2" |
|
shift 2 |
|
;; |
|
--installer-binary) |
|
INSTALLER_BINARY="$2" |
|
shift 2 |
|
;; |
|
--skip-cleanup) |
|
SKIP_CLEANUP=true |
|
shift |
|
;; |
|
--help) |
|
head -n 20 "$0" | tail -n +3 |
|
exit 0 |
|
;; |
|
*) |
|
echo -e "${RED}Unknown option: $1${NC}" |
|
exit 1 |
|
;; |
|
esac |
|
done |
|
|
|
# Logging functions |
|
log_info() { |
|
echo -e "${BLUE}[INFO]${NC} $1" |
|
} |
|
|
|
log_success() { |
|
echo -e "${GREEN}[SUCCESS]${NC} $1" |
|
} |
|
|
|
log_warning() { |
|
echo -e "${YELLOW}[WARNING]${NC} $1" |
|
} |
|
|
|
log_error() { |
|
echo -e "${RED}[ERROR]${NC} $1" |
|
} |
|
|
|
# Check prerequisites |
|
check_prerequisites() { |
|
log_info "Checking prerequisites..." |
|
|
|
# Check AWS CLI |
|
if ! command -v aws &> /dev/null; then |
|
log_error "AWS CLI not found. Please install it first." |
|
exit 1 |
|
fi |
|
|
|
# Check AWS credentials |
|
if ! aws sts get-caller-identity &> /dev/null; then |
|
log_error "AWS credentials not configured. Please run 'aws configure'." |
|
exit 1 |
|
fi |
|
|
|
# Check installer binary |
|
if ! command -v "$INSTALLER_BINARY" &> /dev/null && [ ! -f "$INSTALL_DIR/$INSTALLER_BINARY" ]; then |
|
log_error "openshift-install binary not found. Please specify with --installer-binary." |
|
exit 1 |
|
fi |
|
|
|
# Check metadata.json |
|
if [ ! -f "$INSTALL_DIR/metadata.json" ]; then |
|
log_error "metadata.json not found in $INSTALL_DIR" |
|
exit 1 |
|
fi |
|
|
|
log_success "Prerequisites check passed" |
|
} |
|
|
|
# Get cluster information from metadata.json |
|
get_cluster_info() { |
|
log_info "Reading cluster information from metadata.json..." |
|
|
|
if [ -z "$CLUSTER_ID" ]; then |
|
CLUSTER_ID=$(jq -r '.infraID // .clusterID' "$INSTALL_DIR/metadata.json" 2>/dev/null || echo "") |
|
if [ -z "$CLUSTER_ID" ] || [ "$CLUSTER_ID" = "null" ]; then |
|
log_error "Could not determine cluster ID from metadata.json" |
|
exit 1 |
|
fi |
|
fi |
|
|
|
if [ -z "$AWS_REGION" ]; then |
|
AWS_REGION=$(jq -r '.aws.region // "us-east-1"' "$INSTALL_DIR/metadata.json" 2>/dev/null || echo "us-east-1") |
|
fi |
|
|
|
if [ -z "$VAULT_NAME" ]; then |
|
VAULT_NAME="${CLUSTER_ID}-backup-vault" |
|
fi |
|
|
|
log_info "Cluster ID: $CLUSTER_ID" |
|
log_info "Region: $AWS_REGION" |
|
log_info "Backup Vault: $VAULT_NAME" |
|
} |
|
|
|
# Get AWS account ID |
|
get_aws_account_id() { |
|
log_info "Getting AWS account ID..." |
|
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text --region "$AWS_REGION") |
|
log_info "AWS Account ID: $AWS_ACCOUNT_ID" |
|
} |
|
|
|
# List cluster instances |
|
list_cluster_instances() { |
|
log_info "Listing cluster instances..." |
|
|
|
aws ec2 describe-instances \ |
|
--region "$AWS_REGION" \ |
|
--filters "Name=tag:kubernetes.io/cluster/${CLUSTER_ID},Values=owned" \ |
|
"Name=instance-state-name,Values=running,stopped" \ |
|
--query 'Reservations[*].Instances[*].[InstanceId,InstanceType,State.Name,Tags[?Key==`Name`].Value|[0]]' \ |
|
--output table |
|
|
|
if [ -z "$INSTANCE_ID" ]; then |
|
# Get first master instance |
|
INSTANCE_ID=$(aws ec2 describe-instances \ |
|
--region "$AWS_REGION" \ |
|
--filters "Name=tag:kubernetes.io/cluster/${CLUSTER_ID},Values=owned" \ |
|
"Name=instance-state-name,Values=running,stopped" \ |
|
"Name=tag:Name,Values=*master*" \ |
|
--query 'Reservations[0].Instances[0].InstanceId' \ |
|
--output text 2>/dev/null || echo "") |
|
|
|
if [ -z "$INSTANCE_ID" ] || [ "$INSTANCE_ID" = "None" ]; then |
|
# Fallback to any instance |
|
INSTANCE_ID=$(aws ec2 describe-instances \ |
|
--region "$AWS_REGION" \ |
|
--filters "Name=tag:kubernetes.io/cluster/${CLUSTER_ID},Values=owned" \ |
|
"Name=instance-state-name,Values=running,stopped" \ |
|
--query 'Reservations[0].Instances[0].InstanceId' \ |
|
--output text 2>/dev/null || echo "") |
|
fi |
|
|
|
if [ -z "$INSTANCE_ID" ] || [ "$INSTANCE_ID" = "None" ]; then |
|
log_error "No instances found for cluster $CLUSTER_ID" |
|
exit 1 |
|
fi |
|
fi |
|
|
|
log_info "Selected instance: $INSTANCE_ID" |
|
} |
|
|
|
# Create backup vault |
|
create_backup_vault() { |
|
log_info "Creating backup vault: $VAULT_NAME..." |
|
|
|
if aws backup describe-backup-vault \ |
|
--backup-vault-name "$VAULT_NAME" \ |
|
--region "$AWS_REGION" &> /dev/null; then |
|
log_warning "Backup vault $VAULT_NAME already exists" |
|
else |
|
aws backup create-backup-vault \ |
|
--backup-vault-name "$VAULT_NAME" \ |
|
--region "$AWS_REGION" \ |
|
--output json > /tmp/backup-vault.json |
|
|
|
log_success "Backup vault created" |
|
fi |
|
} |
|
|
|
# Check and create IAM role |
|
setup_iam_role() { |
|
log_info "Checking IAM role: AWSBackupDefaultServiceRole..." |
|
|
|
if aws iam get-role --role-name AWSBackupDefaultServiceRole &> /dev/null; then |
|
log_info "IAM role already exists" |
|
else |
|
log_info "Creating IAM role..." |
|
|
|
# Create trust policy |
|
cat > /tmp/backup-trust-policy.json <<EOF |
|
{ |
|
"Version": "2012-10-17", |
|
"Statement": [ |
|
{ |
|
"Effect": "Allow", |
|
"Principal": { |
|
"Service": "backup.amazonaws.com" |
|
}, |
|
"Action": "sts:AssumeRole" |
|
} |
|
] |
|
} |
|
EOF |
|
|
|
# Create role |
|
aws iam create-role \ |
|
--role-name AWSBackupDefaultServiceRole \ |
|
--assume-role-policy-document file:///tmp/backup-trust-policy.json \ |
|
--description "Default service role for AWS Backup" \ |
|
--output json > /tmp/iam-role.json |
|
|
|
log_success "IAM role created" |
|
fi |
|
|
|
# Attach policies |
|
log_info "Attaching IAM policies..." |
|
|
|
aws iam attach-role-policy \ |
|
--role-name AWSBackupDefaultServiceRole \ |
|
--policy-arn arn:aws:iam::aws:policy/service-role/AWSBackupServiceRolePolicyForBackup \ |
|
2>/dev/null || log_warning "Backup policy may already be attached" |
|
|
|
aws iam attach-role-policy \ |
|
--role-name AWSBackupDefaultServiceRole \ |
|
--policy-arn arn:aws:iam::aws:policy/service-role/AWSBackupServiceRolePolicyForRestores \ |
|
2>/dev/null || log_warning "Restore policy may already be attached" |
|
|
|
# Get role ARN (check path) |
|
ROLE_ARN=$(aws iam get-role --role-name AWSBackupDefaultServiceRole --query 'Role.Arn' --output text) |
|
BACKUP_ROLE_ARN="$ROLE_ARN" |
|
|
|
log_info "Using IAM role ARN: $BACKUP_ROLE_ARN" |
|
|
|
# Verify policies |
|
aws iam list-attached-role-policies --role-name AWSBackupDefaultServiceRole --output table |
|
} |
|
|
|
# Create backup job |
|
create_backup_job() { |
|
log_info "Creating backup job for instance: $INSTANCE_ID..." |
|
|
|
INSTANCE_ARN="arn:aws:ec2:${AWS_REGION}:${AWS_ACCOUNT_ID}:instance/${INSTANCE_ID}" |
|
log_info "Instance ARN: $INSTANCE_ARN" |
|
|
|
# Start backup job |
|
aws backup start-backup-job \ |
|
--backup-vault-name "$VAULT_NAME" \ |
|
--resource-arn "$INSTANCE_ARN" \ |
|
--iam-role-arn "$BACKUP_ROLE_ARN" \ |
|
--region "$AWS_REGION" \ |
|
--output json > /tmp/backup-job.json |
|
|
|
BACKUP_JOB_ID=$(jq -r '.BackupJobId' /tmp/backup-job.json) |
|
|
|
log_success "Backup job created: $BACKUP_JOB_ID" |
|
echo "$BACKUP_JOB_ID" > /tmp/backup-job-id.txt |
|
} |
|
|
|
# Monitor backup job |
|
monitor_backup_job() { |
|
log_info "Monitoring backup job: $BACKUP_JOB_ID..." |
|
log_info "This may take 15-30 minutes depending on instance size..." |
|
|
|
local max_wait=3600 # 60 minutes max |
|
local elapsed=0 |
|
local check_interval=30 |
|
|
|
while [ $elapsed -lt $max_wait ]; do |
|
local status=$(aws backup describe-backup-job \ |
|
--backup-job-id "$BACKUP_JOB_ID" \ |
|
--region "$AWS_REGION" \ |
|
--query 'State' \ |
|
--output text) |
|
|
|
local percent=$(aws backup describe-backup-job \ |
|
--backup-job-id "$BACKUP_JOB_ID" \ |
|
--region "$AWS_REGION" \ |
|
--query 'PercentDone' \ |
|
--output text 2>/dev/null || echo "0.0") |
|
|
|
echo -ne "\r[$(date +%H:%M:%S)] Backup status: $status (${percent}%)" |
|
|
|
if [ "$status" = "COMPLETED" ]; then |
|
echo "" |
|
log_success "Backup completed successfully" |
|
return 0 |
|
elif [ "$status" = "FAILED" ] || [ "$status" = "ABORTED" ]; then |
|
echo "" |
|
log_error "Backup failed with status: $status" |
|
aws backup describe-backup-job \ |
|
--backup-job-id "$BACKUP_JOB_ID" \ |
|
--region "$AWS_REGION" \ |
|
--query '[State,StatusMessage]' \ |
|
--output text |
|
return 1 |
|
fi |
|
|
|
sleep $check_interval |
|
elapsed=$((elapsed + check_interval)) |
|
done |
|
|
|
echo "" |
|
log_warning "Backup monitoring timeout after $max_wait seconds" |
|
return 1 |
|
} |
|
|
|
# Verify backup resources |
|
verify_backup_resources() { |
|
log_info "Verifying backup resources..." |
|
|
|
# Get recovery point ARN |
|
RECOVERY_POINT_ARN=$(aws backup describe-backup-job \ |
|
--backup-job-id "$BACKUP_JOB_ID" \ |
|
--region "$AWS_REGION" \ |
|
--query 'RecoveryPointArn' \ |
|
--output text) |
|
|
|
if [ -z "$RECOVERY_POINT_ARN" ] || [ "$RECOVERY_POINT_ARN" = "None" ]; then |
|
log_error "Could not get recovery point ARN" |
|
return 1 |
|
fi |
|
|
|
log_info "Recovery Point ARN: $RECOVERY_POINT_ARN" |
|
|
|
# Extract AMI ID from recovery point ARN |
|
AMI_ID=$(echo "$RECOVERY_POINT_ARN" | sed 's/.*image\///') |
|
log_info "AMI ID: $AMI_ID" |
|
|
|
# Wait for AMI to be available |
|
log_info "Waiting for AMI to be available..." |
|
local max_wait=1800 # 30 minutes |
|
local elapsed=0 |
|
local check_interval=30 |
|
|
|
while [ $elapsed -lt $max_wait ]; do |
|
local ami_state=$(aws ec2 describe-images \ |
|
--image-ids "$AMI_ID" \ |
|
--region "$AWS_REGION" \ |
|
--query 'Images[0].State' \ |
|
--output text 2>/dev/null || echo "pending") |
|
|
|
if [ "$ami_state" = "available" ]; then |
|
log_success "AMI is available" |
|
break |
|
fi |
|
|
|
echo -ne "\r[$(date +%H:%M:%S)] AMI state: $ami_state" |
|
sleep $check_interval |
|
elapsed=$((elapsed + check_interval)) |
|
done |
|
|
|
echo "" |
|
|
|
# Check for backup tag |
|
log_info "Checking for backup tag on AMI..." |
|
local backup_tag=$(aws ec2 describe-images \ |
|
--image-ids "$AMI_ID" \ |
|
--region "$AWS_REGION" \ |
|
--query 'Images[0].Tags[?Key==`aws:backup:source-resource`].Value' \ |
|
--output text) |
|
|
|
if [ -n "$backup_tag" ] && [ "$backup_tag" != "None" ]; then |
|
log_success "Backup tag found: $backup_tag" |
|
echo "$AMI_ID" > /tmp/backup-ami-id.txt |
|
else |
|
log_warning "Backup tag not found (may appear later)" |
|
fi |
|
|
|
# List backup AMIs |
|
log_info "Listing backup AMIs with cluster tag..." |
|
aws ec2 describe-images \ |
|
--region "$AWS_REGION" \ |
|
--filters "Name=tag:aws:backup:source-resource,Values=*" \ |
|
"Name=tag:kubernetes.io/cluster/${CLUSTER_ID},Values=owned" \ |
|
--query 'Images[*].[ImageId,Name,State]' \ |
|
--output table |
|
} |
|
|
|
# Run destroy command |
|
run_destroy() { |
|
log_info "Running openshift-install destroy cluster..." |
|
log_warning "This will destroy the cluster. Press Ctrl+C within 5 seconds to cancel..." |
|
sleep 5 |
|
|
|
local destroy_log="$INSTALL_DIR/destroy-output-$(date +%Y%m%d-%H%M%S).log" |
|
|
|
log_info "Destroy log: $destroy_log" |
|
|
|
# Determine installer path |
|
local installer_path="$INSTALLER_BINARY" |
|
if [ ! -f "$installer_path" ] && [ -f "$INSTALL_DIR/$INSTALLER_BINARY" ]; then |
|
installer_path="$INSTALL_DIR/$INSTALLER_BINARY" |
|
fi |
|
|
|
# Run destroy |
|
if [ -f "$INSTALL_DIR/metadata.json" ]; then |
|
"$installer_path" destroy cluster \ |
|
--metadata-file "$INSTALL_DIR/metadata.json" \ |
|
--log-level info \ |
|
2>&1 | tee "$destroy_log" |
|
else |
|
"$installer_path" destroy cluster \ |
|
--dir "$INSTALL_DIR" \ |
|
--log-level info \ |
|
2>&1 | tee "$destroy_log" |
|
fi |
|
|
|
echo "$destroy_log" > /tmp/destroy-log-path.txt |
|
log_success "Destroy completed. Log saved to: $destroy_log" |
|
} |
|
|
|
# Verify detection in logs |
|
verify_detection() { |
|
local destroy_log=$(cat /tmp/destroy-log-path.txt 2>/dev/null || echo "") |
|
|
|
if [ -z "$destroy_log" ] || [ ! -f "$destroy_log" ]; then |
|
log_warning "Destroy log not found, skipping verification" |
|
return |
|
fi |
|
|
|
log_info "Verifying detection in destroy log: $destroy_log" |
|
|
|
# Check for AMI warning |
|
if grep -q "Skipping AMI image.*managed by the AWS Backup service" "$destroy_log"; then |
|
log_success "✓ AMI detection found in logs" |
|
grep "Skipping AMI image.*managed by the AWS Backup service" "$destroy_log" |
|
else |
|
log_warning "AMI detection not found in logs" |
|
fi |
|
|
|
# Check for snapshot warning |
|
if grep -qi "Skipping snapshot.*AWS Backup service" "$destroy_log"; then |
|
log_success "✓ Snapshot detection found in logs" |
|
grep -i "Skipping snapshot.*AWS Backup service" "$destroy_log" |
|
else |
|
log_warning "Snapshot detection not found in logs" |
|
fi |
|
|
|
# Check for completion |
|
if grep -q "Uninstallation complete" "$destroy_log"; then |
|
log_success "✓ Uninstallation completed successfully" |
|
else |
|
log_warning "Uninstallation completion message not found" |
|
fi |
|
} |
|
|
|
# Cleanup backup resources |
|
cleanup_backup_resources() { |
|
if [ "$SKIP_CLEANUP" = true ]; then |
|
log_info "Skipping cleanup (--skip-cleanup specified)" |
|
return |
|
fi |
|
|
|
log_info "Cleaning up backup resources..." |
|
|
|
# List recovery points |
|
local recovery_points=$(aws backup list-recovery-points-by-backup-vault \ |
|
--backup-vault-name "$VAULT_NAME" \ |
|
--region "$AWS_REGION" \ |
|
--query 'RecoveryPoints[*].RecoveryPointArn' \ |
|
--output text) |
|
|
|
if [ -n "$recovery_points" ] && [ "$recovery_points" != "None" ]; then |
|
log_info "Deleting recovery points..." |
|
for rp in $recovery_points; do |
|
log_info "Deleting recovery point: $rp" |
|
aws backup delete-recovery-point \ |
|
--backup-vault-name "$VAULT_NAME" \ |
|
--recovery-point-arn "$rp" \ |
|
--region "$AWS_REGION" 2>/dev/null || log_warning "Failed to delete recovery point: $rp" |
|
done |
|
|
|
# Wait for deletion |
|
log_info "Waiting for recovery points to be deleted..." |
|
sleep 30 |
|
fi |
|
|
|
# Delete backup vault (only if empty) |
|
log_info "Deleting backup vault..." |
|
aws backup delete-backup-vault \ |
|
--backup-vault-name "$VAULT_NAME" \ |
|
--region "$AWS_REGION" 2>/dev/null || log_warning "Backup vault may not be empty or already deleted" |
|
|
|
log_success "Cleanup completed" |
|
} |
|
|
|
# Main execution |
|
main() { |
|
log_info "==========================================" |
|
log_info "AWS Backup Detection Mechanism Test" |
|
log_info "==========================================" |
|
echo "" |
|
|
|
check_prerequisites |
|
get_cluster_info |
|
get_aws_account_id |
|
list_cluster_instances |
|
create_backup_vault |
|
setup_iam_role |
|
create_backup_job |
|
monitor_backup_job |
|
verify_backup_resources |
|
run_destroy |
|
verify_detection |
|
cleanup_backup_resources |
|
|
|
log_success "==========================================" |
|
log_success "Test completed successfully!" |
|
log_success "==========================================" |
|
} |
|
|
|
# Run main function |
|
main "$@" |