python-toolbox/file tools/list_s3_files.py

100 lines
No EOL
3.8 KiB
Python

import argparse
import boto3
from botocore.exceptions import NoCredentialsError, ClientError
import logging
from urllib.parse import quote
# --- Configuration ---
# Set up logging for clear, informative output.
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def get_s3_client():
"""
Initializes and returns a Boto3 S3 client.
Handles credential errors gracefully.
"""
try:
s3_client = boto3.client('s3')
# A quick check to ensure credentials are valid by listing buckets
s3_client.list_buckets()
return s3_client
except NoCredentialsError:
logging.error("AWS credentials not found. Please run 'aws configure' or set up environment variables.")
return None
except ClientError as e:
logging.error(f"An AWS client error occurred: {e}")
return None
def get_bucket_region(s3_client, bucket_name):
"""
Retrieves the AWS region where the S3 bucket is located.
"""
try:
response = s3_client.get_bucket_location(Bucket=bucket_name)
# For us-east-1, the LocationConstraint is None. For other regions, it's the region string.
region = response.get('LocationConstraint')
return region if region is not None else 'us-east-1'
except ClientError as e:
if e.response['Error']['Code'] == 'NoSuchBucket':
logging.error(f"The bucket '{bucket_name}' does not exist.")
else:
logging.error(f"Could not get location for bucket '{bucket_name}': {e}")
return None
def list_files_and_generate_urls(s3_client, bucket_name):
"""
Lists all files in an S3 bucket and prints their public URLs.
"""
logging.info(f"Fetching region for bucket '{bucket_name}'...")
region = get_bucket_region(s3_client, bucket_name)
if not region:
logging.error("Aborting due to failure in retrieving bucket region.")
return
logging.info(f"Bucket is in region: {region}")
logging.info("Listing files and generating URLs...")
# Construct the base URL. The format is: https://<bucket-name>.s3.<region>.amazonaws.com/<key>
base_url = f"https://{bucket_name}.s3.{region}.amazonaws.com/"
file_count = 0
try:
# Use a paginator to handle buckets with more than 1000 objects automatically.
paginator = s3_client.get_paginator('list_objects_v2')
pages = paginator.paginate(Bucket=bucket_name)
for page in pages:
if "Contents" in page:
for obj in page['Contents']:
# The object key is the 'filename' in the S3 bucket.
object_key = obj['Key']
# URL-encode the key to handle special characters like spaces, etc.
encoded_key = quote(object_key)
file_url = f"{base_url}{encoded_key}"
print(f"File: {object_key}\nURL: {file_url}\n")
file_count += 1
else:
# This handles the case of an empty bucket
pass
logging.info("="*30)
if file_count == 0:
logging.info(f"The bucket '{bucket_name}' is empty.")
else:
logging.info(f"Found {file_count} file(s) in '{bucket_name}'.")
logging.info("="*30)
except ClientError as e:
logging.error(f"An error occurred while listing files: {e}")
if __name__ == "__main__":
# --- Command-Line Argument Parsing ---
parser = argparse.ArgumentParser(description="List files in an S3 bucket and generate their public URLs.")
parser.add_argument("bucket_name", help="The name of the S3 bucket.")
args = parser.parse_args()
s3 = get_s3_client()
if s3:
list_files_and_generate_urls(s3, args.bucket_name)