python-toolbox/file tools/list_s3_files.py

import argparse
import boto3
from botocore.exceptions import NoCredentialsError, ClientError
import logging
from urllib.parse import quote

# --- Configuration ---
# Set up logging for clear, informative output.
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def get_s3_client():
    """
    Initializes and returns a Boto3 S3 client.
    Handles credential errors gracefully.
    """
    try:
        s3_client = boto3.client('s3')
        # A quick check to ensure credentials are valid by listing buckets
        s3_client.list_buckets()
        return s3_client
    except NoCredentialsError:
        logging.error("AWS credentials not found. Please run 'aws configure' or set up environment variables.")
        return None
    except ClientError as e:
        logging.error(f"An AWS client error occurred: {e}")
        return None

def get_bucket_region(s3_client, bucket_name):
    """
    Retrieves the AWS region where the S3 bucket is located.
    """
    try:
        response = s3_client.get_bucket_location(Bucket=bucket_name)
        # For us-east-1, the LocationConstraint is None. For other regions, it's the region string.
        region = response.get('LocationConstraint')
        return region if region is not None else 'us-east-1'
    except ClientError as e:
        if e.response['Error']['Code'] == 'NoSuchBucket':
            logging.error(f"The bucket '{bucket_name}' does not exist.")
        else:
            logging.error(f"Could not get location for bucket '{bucket_name}': {e}")
        return None

def list_files_and_generate_urls(s3_client, bucket_name):
    """
    Lists all files in an S3 bucket and prints their public URLs.
    """
    logging.info(f"Fetching region for bucket '{bucket_name}'...")
    region = get_bucket_region(s3_client, bucket_name)
    if not region:
        logging.error("Aborting due to failure in retrieving bucket region.")
        return

    logging.info(f"Bucket is in region: {region}")
    logging.info("Listing files and generating URLs...")

    # Construct the base URL. The format is: https://<bucket-name>.s3.<region>.amazonaws.com/<key>
    base_url = f"https://{bucket_name}.s3.{region}.amazonaws.com/"

    file_count = 0
    try:
        # Use a paginator to handle buckets with more than 1000 objects automatically.
        paginator = s3_client.get_paginator('list_objects_v2')
        pages = paginator.paginate(Bucket=bucket_name)

        for page in pages:
            if "Contents" in page:
                for obj in page['Contents']:
                    # The object key is the 'filename' in the S3 bucket.
                    object_key = obj['Key']

                    # URL-encode the key to handle special characters like spaces, etc.
                    encoded_key = quote(object_key)

                    file_url = f"{base_url}{encoded_key}"
                    print(f"File: {object_key}\nURL:  {file_url}\n")
                    file_count += 1
            else:
                # This handles the case of an empty bucket
                pass

        logging.info("="*30)
        if file_count == 0:
            logging.info(f"The bucket '{bucket_name}' is empty.")
        else:
            logging.info(f"Found {file_count} file(s) in '{bucket_name}'.")
        logging.info("="*30)

    except ClientError as e:
        logging.error(f"An error occurred while listing files: {e}")

if __name__ == "__main__":
    # --- Command-Line Argument Parsing ---
    parser = argparse.ArgumentParser(description="List files in an S3 bucket and generate their public URLs.")
    parser.add_argument("bucket_name", help="The name of the S3 bucket.")
    args = parser.parse_args()

    s3 = get_s3_client()
    if s3:
        list_files_and_generate_urls(s3, args.bucket_name)