import argparse import boto3 from botocore.exceptions import NoCredentialsError, ClientError import logging from urllib.parse import quote # --- Configuration --- # Set up logging for clear, informative output. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def get_s3_client(): """ Initializes and returns a Boto3 S3 client. Handles credential errors gracefully. """ try: s3_client = boto3.client('s3') # A quick check to ensure credentials are valid by listing buckets s3_client.list_buckets() return s3_client except NoCredentialsError: logging.error("AWS credentials not found. Please run 'aws configure' or set up environment variables.") return None except ClientError as e: logging.error(f"An AWS client error occurred: {e}") return None def get_bucket_region(s3_client, bucket_name): """ Retrieves the AWS region where the S3 bucket is located. """ try: response = s3_client.get_bucket_location(Bucket=bucket_name) # For us-east-1, the LocationConstraint is None. For other regions, it's the region string. region = response.get('LocationConstraint') return region if region is not None else 'us-east-1' except ClientError as e: if e.response['Error']['Code'] == 'NoSuchBucket': logging.error(f"The bucket '{bucket_name}' does not exist.") else: logging.error(f"Could not get location for bucket '{bucket_name}': {e}") return None def list_files_and_generate_urls(s3_client, bucket_name): """ Lists all files in an S3 bucket and prints their public URLs. """ logging.info(f"Fetching region for bucket '{bucket_name}'...") region = get_bucket_region(s3_client, bucket_name) if not region: logging.error("Aborting due to failure in retrieving bucket region.") return logging.info(f"Bucket is in region: {region}") logging.info("Listing files and generating URLs...") # Construct the base URL. The format is: https://.s3..amazonaws.com/ base_url = f"https://{bucket_name}.s3.{region}.amazonaws.com/" file_count = 0 try: # Use a paginator to handle buckets with more than 1000 objects automatically. paginator = s3_client.get_paginator('list_objects_v2') pages = paginator.paginate(Bucket=bucket_name) for page in pages: if "Contents" in page: for obj in page['Contents']: # The object key is the 'filename' in the S3 bucket. object_key = obj['Key'] # URL-encode the key to handle special characters like spaces, etc. encoded_key = quote(object_key) file_url = f"{base_url}{encoded_key}" print(f"File: {object_key}\nURL: {file_url}\n") file_count += 1 else: # This handles the case of an empty bucket pass logging.info("="*30) if file_count == 0: logging.info(f"The bucket '{bucket_name}' is empty.") else: logging.info(f"Found {file_count} file(s) in '{bucket_name}'.") logging.info("="*30) except ClientError as e: logging.error(f"An error occurred while listing files: {e}") if __name__ == "__main__": # --- Command-Line Argument Parsing --- parser = argparse.ArgumentParser(description="List files in an S3 bucket and generate their public URLs.") parser.add_argument("bucket_name", help="The name of the S3 bucket.") args = parser.parse_args() s3 = get_s3_client() if s3: list_files_and_generate_urls(s3, args.bucket_name)