csv_comparison / app.py
johnnydevriese's picture
Create app.py
002b754 verified
raw
history blame
2.31 kB
import gradio as gr
import pandas as pd
import numpy as np
# Flexoki colors for highlighting
HIGHLIGHT_COLORS = [
"#D14D41", # Red
"#879A39", # Green
"#66A1DC", # Blue
"#D0A215", # Yellow
"#8E5F26", # Brown
]
def highlight_diff(*values):
unique_values = set(str(v) for v in values if pd.notna(v))
if len(unique_values) == 1:
return str(values[0])
highlighted = []
for i, value in enumerate(values):
if pd.isna(value):
continue
highlighted.append(
f'<span style="background-color: {HIGHLIGHT_COLORS[i]}; color: white;">{value}</span>'
)
return " | ".join(highlighted)
def compare_csvs(*files):
valid_files = [f for f in files if f is not None]
if len(valid_files) < 2:
return "Please upload at least two CSV files.", None
# Read CSV files
dataframes = [pd.read_csv(file.name) for file in valid_files]
# Check if all dataframes have the same shape
if len(set(df.shape for df in dataframes)) > 1:
return (
"The CSV files have different shapes. Please ensure they all have the same number of rows and columns.",
None,
)
# Create a combined dataframe for comparison
combined_df = dataframes[0].copy()
for col in combined_df.columns:
combined_df[col] = [
highlight_diff(*values) for values in zip(*(df[col] for df in dataframes))
]
# Calculate summary
total_cells = dataframes[0].size
different_cells = sum((dataframes[0] != df).sum().sum() for df in dataframes[1:])
diff_percentage = (different_cells / (total_cells * (len(dataframes) - 1))) * 100
summary = f"Total cells: {total_cells}\n"
summary += f"Different cells: {different_cells}\n"
summary += f"Percentage of differences: {diff_percentage:.2f}%"
return summary, combined_df.to_html(escape=False)
with gr.Blocks() as app:
gr.Markdown("# πŸ’• Kindness - CSV Comparison")
with gr.Row():
files = [gr.File(label=f"Upload CSV {i+1}") for i in range(5)]
compare_btn = gr.Button("Compare CSVs")
summary = gr.Textbox(label="Summary")
diff_output = gr.HTML(label="Differences")
compare_btn.click(compare_csvs, inputs=files, outputs=[summary, diff_output])
app.launch()