import os
import pandas as pd
from openpyxl import load_workbook
def consolidate_excel_files(input_folder, output_file):
# List to hold data from all Excel files
all_data = []
# Loop through all files in the input folder
for file_name in os.listdir(input_folder):
# Check if the file is an Excel file
if file_name.endswith('.xlsx') or file_name.endswith('.xls'):
# Create the full file path
file_path = os.path.join(input_folder, file_name)
try:
# Load the workbook using openpyxl to get table names
wb = load_workbook(file_path, data_only=True)
for sheet_name in wb.sheetnames:
sheet = wb[sheet_name]
# Check for tables in the sheet
if sheet.tables:
for table_name, table_obj in sheet.tables.items():
# Define the range of the table
data_range = sheet[table_obj.ref]
data = [[cell.value for cell in row] for row in data_range]
# Convert to DataFrame
df_table = pd.DataFrame(data)
# Set the first row as the header
df_table.columns = df_table.iloc[0]
df_table = df_table[1:]
# Add metadata columns
df_table['Source File'] = file_name
df_table['Sheet Name'] = sheet_name
df_table['Table Name'] = table_name
# Append the DataFrame to the list
all_data.append(df_table)
else:
print(f"No tables found in {file_name} - {sheet_name}")
except Exception as e:
print(f"Error processing {file_name}: {e}")
if all_data:
# Concatenate all data into a single DataFrame
consolidated_df = pd.concat(all_data, ignore_index=True)
# Write the consolidated data to a new Excel file
consolidated_df.to_excel(output_file, index=False)
print(f"Consolidated Excel file saved as {output_file}")
else:
print("No data to consolidate.")
# Usage example:
input_folder = 'path_to_your_folder' # Replace with your folder path
output_file = 'consolidated_output.xlsx' # Replace with your desired output file name
consolidate_excel_files(input_folder, output_file)