python-toolbox/church-tools/elvanto_attendance_fetcher.py

import requests
import pandas as pd
import csv

# --- CONFIGURATION ---
# IMPORTANT: Replace this with the full URL for your Elvanto report,
# including the authkey.
REPORT_URL = "https://cairnspc.elvanto.com.au/report/?id=c5ab005a-f2be-403e-84a5-85870ac4a41b&authkey=lguYFfS9"

# The name of the file where the data will be saved.
OUTPUT_FILE = "scraped_elvanto_report.csv"

# This mapping helps rename columns from the Elvanto HTML report
# to the desired CSV column names. This is the MOST LIKELY part you'll need to edit.
#
# - Keys: The column names as they appear EXACTLY in the Elvanto report table.
# - Values: The new column names you want in your final CSV.
#
# Open the report link in your browser to see the exact column headers and update the keys below.
# Note: Column names might be slightly different between yearly tables, so check them all.
COLUMN_MAPPING = {
    'Date': 'Date',
    'Service': 'Service',
    'Total Individuals': 'Total Individuals', # This might be 'Total Individual' or 'Sum' in your report
    'Adults': 'Adults',
    'Children': 'Children',
    'Guests': 'Guests', # Elvanto reports often use 'Visitors' for guests.
}
# --- END OF CONFIGURATION ---

def main():
    """
    Main function to fetch an Elvanto report page, scrape all data tables,
    and write the combined results to a CSV file.
    """
    if "YOUR_REPORT_URL_HERE" in REPORT_URL:
        print("Error: Please update the 'REPORT_URL' in the script before running.")
        return

    print(f"Attempting to fetch data from your Elvanto report URL...")

    try:
        # Use headers to mimic a web browser, which can help with access.
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(REPORT_URL, headers=headers)
        response.raise_for_status()
        print("Successfully fetched the report page.")

        # pandas.read_html() intelligently searches for <table> tags on the page
        # and returns a list of DataFrames, one for each table found.
        tables = pd.read_html(response.text)

        if not tables:
            print("\nError: No data tables were found on the report page.")
            print("Please check that the URL is correct and the report is loading properly in your browser.")
            return

        print(f"Found {len(tables)} data table(s) on the page. Combining them...")

        all_dataframes = []

        for i, table_df in enumerate(tables):
            print(f"--- Processing table {i+1} of {len(tables)} ---")

            processed_df = table_df.copy()
            processed_df.rename(columns=COLUMN_MAPPING, inplace=True)

            required_columns = list(COLUMN_MAPPING.values())
            final_columns = [col for col in required_columns if col in processed_df.columns]

            if not final_columns:
                print(f"Warning: No mappable columns found in table {i+1}. Skipping this table.")
                continue

            all_dataframes.append(processed_df[final_columns])

        if not all_dataframes:
            print("\nError: After processing all tables, no valid data could be extracted.")
            print("Please double-check your 'COLUMN_MAPPING' configuration.")
            return

        # Concatenate all the processed dataframes into one
        df_final = pd.concat(all_dataframes, ignore_index=True)
        print("\nSuccessfully combined all tables.")

        # Data Cleaning: Remove summary rows like 'Total' or 'Average'
        df_final = df_final[~df_final.iloc[:, 0].astype(str).str.contains('Total|Average', na=False)]

        # Convert date column to the correct format if it exists
        if 'Date' in df_final.columns:
            try:
                df_final['Date'] = pd.to_datetime(df_final['Date']).dt.strftime('%Y-%m-%d %H:%M:%S')
                df_final.sort_values(by='Date', ascending=False, inplace=True)
            except Exception as e:
                print(f"\nWarning: Could not automatically format the 'Date' column. It may contain unexpected values. Error: {e}")

        df_final.to_csv(OUTPUT_FILE, index=False, quoting=csv.QUOTE_ALL)

        print(f"\nSuccess! Scraped {len(df_final)} records and saved them to '{OUTPUT_FILE}'.")
        print(f"Final columns in CSV: {df_final.columns.tolist()}")

    except requests.exceptions.RequestException as e:
        print(f"\nError: Failed to fetch the URL. Please check the REPORT_URL and your internet connection. Details: {e}")
    except Exception as e:
        print(f"\nAn unexpected error occurred during processing: {e}")


if __name__ == "__main__":
    main()