import requests
import pandas as pd
import csv
# --- CONFIGURATION ---
# IMPORTANT: Replace this with the full URL for your Elvanto report,
# including the authkey.
REPORT_URL = "https://cairnspc.elvanto.com.au/report/?id=c5ab005a-f2be-403e-84a5-85870ac4a41b&authkey=lguYFfS9"
# The name of the file where the data will be saved.
OUTPUT_FILE = "scraped_elvanto_report.csv"
# This mapping helps rename columns from the Elvanto HTML report
# to the desired CSV column names. This is the MOST LIKELY part you'll need to edit.
#
# - Keys: The column names as they appear EXACTLY in the Elvanto report table.
# - Values: The new column names you want in your final CSV.
#
# Open the report link in your browser to see the exact column headers and update the keys below.
# Note: Column names might be slightly different between yearly tables, so check them all.
COLUMN_MAPPING = {
'Date': 'Date',
'Service': 'Service',
'Total Individuals': 'Total Individuals', # This might be 'Total Individual' or 'Sum' in your report
'Adults': 'Adults',
'Children': 'Children',
'Guests': 'Guests', # Elvanto reports often use 'Visitors' for guests.
}
# --- END OF CONFIGURATION ---
def main():
"""
Main function to fetch an Elvanto report page, scrape all data tables,
and write the combined results to a CSV file.
"""
if "YOUR_REPORT_URL_HERE" in REPORT_URL:
print("Error: Please update the 'REPORT_URL' in the script before running.")
return
print(f"Attempting to fetch data from your Elvanto report URL...")
try:
# Use headers to mimic a web browser, which can help with access.
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(REPORT_URL, headers=headers)
response.raise_for_status()
print("Successfully fetched the report page.")
# pandas.read_html() intelligently searches for
tags on the page
# and returns a list of DataFrames, one for each table found.
tables = pd.read_html(response.text)
if not tables:
print("\nError: No data tables were found on the report page.")
print("Please check that the URL is correct and the report is loading properly in your browser.")
return
print(f"Found {len(tables)} data table(s) on the page. Combining them...")
all_dataframes = []
for i, table_df in enumerate(tables):
print(f"--- Processing table {i+1} of {len(tables)} ---")
processed_df = table_df.copy()
processed_df.rename(columns=COLUMN_MAPPING, inplace=True)
required_columns = list(COLUMN_MAPPING.values())
final_columns = [col for col in required_columns if col in processed_df.columns]
if not final_columns:
print(f"Warning: No mappable columns found in table {i+1}. Skipping this table.")
continue
all_dataframes.append(processed_df[final_columns])
if not all_dataframes:
print("\nError: After processing all tables, no valid data could be extracted.")
print("Please double-check your 'COLUMN_MAPPING' configuration.")
return
# Concatenate all the processed dataframes into one
df_final = pd.concat(all_dataframes, ignore_index=True)
print("\nSuccessfully combined all tables.")
# Data Cleaning: Remove summary rows like 'Total' or 'Average'
df_final = df_final[~df_final.iloc[:, 0].astype(str).str.contains('Total|Average', na=False)]
# Convert date column to the correct format if it exists
if 'Date' in df_final.columns:
try:
df_final['Date'] = pd.to_datetime(df_final['Date']).dt.strftime('%Y-%m-%d %H:%M:%S')
df_final.sort_values(by='Date', ascending=False, inplace=True)
except Exception as e:
print(f"\nWarning: Could not automatically format the 'Date' column. It may contain unexpected values. Error: {e}")
df_final.to_csv(OUTPUT_FILE, index=False, quoting=csv.QUOTE_ALL)
print(f"\nSuccess! Scraped {len(df_final)} records and saved them to '{OUTPUT_FILE}'.")
print(f"Final columns in CSV: {df_final.columns.tolist()}")
except requests.exceptions.RequestException as e:
print(f"\nError: Failed to fetch the URL. Please check the REPORT_URL and your internet connection. Details: {e}")
except Exception as e:
print(f"\nAn unexpected error occurred during processing: {e}")
if __name__ == "__main__":
main()