156 lines
No EOL
6 KiB
Python
156 lines
No EOL
6 KiB
Python
import os
|
|
import argparse
|
|
import boto3
|
|
from botocore.exceptions import NoCredentialsError, ClientError
|
|
import logging
|
|
from datetime import datetime, timezone
|
|
|
|
# --- Configuration ---
|
|
# Set up logging to provide clear output
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
def get_s3_client():
|
|
"""
|
|
Initializes and returns a Boto3 S3 client.
|
|
Handles credential errors gracefully.
|
|
"""
|
|
try:
|
|
# Boto3 will automatically look for credentials in the standard locations:
|
|
# 1. Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY)
|
|
# 2. The ~/.aws/credentials file
|
|
s3_client = boto3.client('s3')
|
|
# A quick check to ensure credentials are valid
|
|
s3_client.list_buckets()
|
|
return s3_client
|
|
except NoCredentialsError:
|
|
logging.error("AWS credentials not found. Please configure them using 'aws configure' or environment variables.")
|
|
return None
|
|
except ClientError as e:
|
|
if e.response['Error']['Code'] == 'InvalidAccessKeyId':
|
|
logging.error("Invalid AWS Access Key ID. Please check your credentials.")
|
|
else:
|
|
logging.error(f"An AWS client error occurred: {e}")
|
|
return None
|
|
except Exception as e:
|
|
logging.error(f"An unexpected error occurred during S3 client initialization: {e}")
|
|
return None
|
|
|
|
|
|
def get_s3_objects(s3_client, bucket_name):
|
|
"""
|
|
Fetches all objects in the S3 bucket and returns a dictionary
|
|
mapping object keys to their last modified timestamps.
|
|
"""
|
|
s3_objects = {}
|
|
try:
|
|
# Use a paginator to handle buckets with more than 1000 objects
|
|
paginator = s3_client.get_paginator('list_objects_v2')
|
|
pages = paginator.paginate(Bucket=bucket_name)
|
|
for page in pages:
|
|
if "Contents" in page:
|
|
for obj in page['Contents']:
|
|
s3_objects[obj['Key']] = obj['LastModified']
|
|
except ClientError as e:
|
|
if e.response['Error']['Code'] == 'NoSuchBucket':
|
|
logging.error(f"The bucket '{bucket_name}' does not exist.")
|
|
else:
|
|
logging.error(f"Could not list objects in bucket '{bucket_name}': {e}")
|
|
return None
|
|
return s3_objects
|
|
|
|
|
|
def sync_folder_to_s3(s3_client, local_folder, bucket_name, delete_extra_files):
|
|
"""
|
|
Syncs the contents of a local folder to an S3 bucket.
|
|
"""
|
|
if not os.path.isdir(local_folder):
|
|
logging.error(f"Local directory not found: {local_folder}")
|
|
return
|
|
|
|
logging.info(f"Starting sync from '{local_folder}' to S3 bucket '{bucket_name}'...")
|
|
|
|
s3_objects = get_s3_objects(s3_client, bucket_name)
|
|
if s3_objects is None:
|
|
logging.error("Aborting sync due to S3 error.")
|
|
return
|
|
|
|
local_files = set()
|
|
upload_count = 0
|
|
skip_count = 0
|
|
|
|
# --- Step 1: Walk local directory and upload new/modified files ---
|
|
for root, _, files in os.walk(local_folder):
|
|
for filename in files:
|
|
local_path = os.path.join(root, filename)
|
|
# Create the relative path to use as the S3 object key
|
|
relative_path = os.path.relpath(local_path, local_folder)
|
|
# S3 uses forward slashes, so convert for cross-platform compatibility
|
|
s3_key = relative_path.replace(os.path.sep, '/')
|
|
local_files.add(s3_key)
|
|
|
|
local_mtime_dt = datetime.fromtimestamp(os.path.getmtime(local_path), tz=timezone.utc)
|
|
|
|
# Check if file needs to be uploaded
|
|
if s3_key not in s3_objects or local_mtime_dt > s3_objects[s3_key]:
|
|
try:
|
|
logging.info(f"Uploading: {s3_key}")
|
|
s3_client.upload_file(local_path, bucket_name, s3_key)
|
|
upload_count += 1
|
|
except ClientError as e:
|
|
logging.error(f"Failed to upload {local_path}: {e}")
|
|
else:
|
|
logging.debug(f"Skipping (unchanged): {s3_key}")
|
|
skip_count += 1
|
|
|
|
logging.info("Local file scan complete.")
|
|
|
|
# --- Step 2: Delete files from S3 that are not present locally (if enabled) ---
|
|
delete_count = 0
|
|
if delete_extra_files:
|
|
logging.info("Checking for files to delete from S3...")
|
|
s3_keys_to_delete = [
|
|
{'Key': key} for key in s3_objects if key not in local_files
|
|
]
|
|
|
|
if s3_keys_to_delete:
|
|
# S3 delete_objects can handle up to 1000 keys at a time
|
|
for i in range(0, len(s3_keys_to_delete), 1000):
|
|
chunk = s3_keys_to_delete[i:i + 1000]
|
|
try:
|
|
logging.info(f"Deleting {len(chunk)} files from S3...")
|
|
s3_client.delete_objects(
|
|
Bucket=bucket_name,
|
|
Delete={'Objects': chunk}
|
|
)
|
|
delete_count += len(chunk)
|
|
except ClientError as e:
|
|
logging.error(f"Failed to delete objects from S3: {e}")
|
|
else:
|
|
logging.info("No files to delete from S3.")
|
|
|
|
# --- Final Summary ---
|
|
logging.info("="*30)
|
|
logging.info("Sync Summary")
|
|
logging.info(f" - Uploaded: {upload_count} files")
|
|
logging.info(f" - Skipped: {skip_count} files (up-to-date)")
|
|
if delete_extra_files:
|
|
logging.info(f" - Deleted: {delete_count} files from S3")
|
|
logging.info("Sync complete.")
|
|
logging.info("="*30)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# --- Command-Line Argument Parsing ---
|
|
parser = argparse.ArgumentParser(description="Sync a local folder to an Amazon S3 bucket.")
|
|
parser.add_argument("local_folder", help="The local folder to sync.")
|
|
parser.add_argument("bucket_name", help="The name of the S3 bucket.")
|
|
parser.add_argument(
|
|
"--delete",
|
|
action="store_true",
|
|
help="Delete files from the S3 bucket that do not exist in the local folder."
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
s3_client = get_s3_client()
|
|
if s3_client:
|
|
sync_folder_to_s3(s3_client, args.local_folder, args.bucket_name, args.delete) |