import os import argparse import boto3 from botocore.exceptions import NoCredentialsError, ClientError import logging from datetime import datetime, timezone # --- Configuration --- # Set up logging to provide clear output logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def get_s3_client(): """ Initializes and returns a Boto3 S3 client. Handles credential errors gracefully. """ try: # Boto3 will automatically look for credentials in the standard locations: # 1. Environment variables (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) # 2. The ~/.aws/credentials file s3_client = boto3.client('s3') # A quick check to ensure credentials are valid s3_client.list_buckets() return s3_client except NoCredentialsError: logging.error("AWS credentials not found. Please configure them using 'aws configure' or environment variables.") return None except ClientError as e: if e.response['Error']['Code'] == 'InvalidAccessKeyId': logging.error("Invalid AWS Access Key ID. Please check your credentials.") else: logging.error(f"An AWS client error occurred: {e}") return None except Exception as e: logging.error(f"An unexpected error occurred during S3 client initialization: {e}") return None def get_s3_objects(s3_client, bucket_name): """ Fetches all objects in the S3 bucket and returns a dictionary mapping object keys to their last modified timestamps. """ s3_objects = {} try: # Use a paginator to handle buckets with more than 1000 objects paginator = s3_client.get_paginator('list_objects_v2') pages = paginator.paginate(Bucket=bucket_name) for page in pages: if "Contents" in page: for obj in page['Contents']: s3_objects[obj['Key']] = obj['LastModified'] except ClientError as e: if e.response['Error']['Code'] == 'NoSuchBucket': logging.error(f"The bucket '{bucket_name}' does not exist.") else: logging.error(f"Could not list objects in bucket '{bucket_name}': {e}") return None return s3_objects def sync_folder_to_s3(s3_client, local_folder, bucket_name, delete_extra_files): """ Syncs the contents of a local folder to an S3 bucket. """ if not os.path.isdir(local_folder): logging.error(f"Local directory not found: {local_folder}") return logging.info(f"Starting sync from '{local_folder}' to S3 bucket '{bucket_name}'...") s3_objects = get_s3_objects(s3_client, bucket_name) if s3_objects is None: logging.error("Aborting sync due to S3 error.") return local_files = set() upload_count = 0 skip_count = 0 # --- Step 1: Walk local directory and upload new/modified files --- for root, _, files in os.walk(local_folder): for filename in files: local_path = os.path.join(root, filename) # Create the relative path to use as the S3 object key relative_path = os.path.relpath(local_path, local_folder) # S3 uses forward slashes, so convert for cross-platform compatibility s3_key = relative_path.replace(os.path.sep, '/') local_files.add(s3_key) local_mtime_dt = datetime.fromtimestamp(os.path.getmtime(local_path), tz=timezone.utc) # Check if file needs to be uploaded if s3_key not in s3_objects or local_mtime_dt > s3_objects[s3_key]: try: logging.info(f"Uploading: {s3_key}") s3_client.upload_file(local_path, bucket_name, s3_key) upload_count += 1 except ClientError as e: logging.error(f"Failed to upload {local_path}: {e}") else: logging.debug(f"Skipping (unchanged): {s3_key}") skip_count += 1 logging.info("Local file scan complete.") # --- Step 2: Delete files from S3 that are not present locally (if enabled) --- delete_count = 0 if delete_extra_files: logging.info("Checking for files to delete from S3...") s3_keys_to_delete = [ {'Key': key} for key in s3_objects if key not in local_files ] if s3_keys_to_delete: # S3 delete_objects can handle up to 1000 keys at a time for i in range(0, len(s3_keys_to_delete), 1000): chunk = s3_keys_to_delete[i:i + 1000] try: logging.info(f"Deleting {len(chunk)} files from S3...") s3_client.delete_objects( Bucket=bucket_name, Delete={'Objects': chunk} ) delete_count += len(chunk) except ClientError as e: logging.error(f"Failed to delete objects from S3: {e}") else: logging.info("No files to delete from S3.") # --- Final Summary --- logging.info("="*30) logging.info("Sync Summary") logging.info(f" - Uploaded: {upload_count} files") logging.info(f" - Skipped: {skip_count} files (up-to-date)") if delete_extra_files: logging.info(f" - Deleted: {delete_count} files from S3") logging.info("Sync complete.") logging.info("="*30) if __name__ == "__main__": # --- Command-Line Argument Parsing --- parser = argparse.ArgumentParser(description="Sync a local folder to an Amazon S3 bucket.") parser.add_argument("local_folder", help="The local folder to sync.") parser.add_argument("bucket_name", help="The name of the S3 bucket.") parser.add_argument( "--delete", action="store_true", help="Delete files from the S3 bucket that do not exist in the local folder." ) args = parser.parse_args() s3_client = get_s3_client() if s3_client: sync_folder_to_s3(s3_client, args.local_folder, args.bucket_name, args.delete)