File size: 2,312 Bytes
002b754
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gradio as gr
import pandas as pd
import numpy as np

# Flexoki colors for highlighting
HIGHLIGHT_COLORS = [
    "#D14D41",  # Red
    "#879A39",  # Green
    "#66A1DC",  # Blue
    "#D0A215",  # Yellow
    "#8E5F26",  # Brown
]


def highlight_diff(*values):
    unique_values = set(str(v) for v in values if pd.notna(v))
    if len(unique_values) == 1:
        return str(values[0])

    highlighted = []
    for i, value in enumerate(values):
        if pd.isna(value):
            continue
        highlighted.append(
            f'<span style="background-color: {HIGHLIGHT_COLORS[i]}; color: white;">{value}</span>'
        )

    return " | ".join(highlighted)


def compare_csvs(*files):
    valid_files = [f for f in files if f is not None]
    if len(valid_files) < 2:
        return "Please upload at least two CSV files.", None

    # Read CSV files
    dataframes = [pd.read_csv(file.name) for file in valid_files]

    # Check if all dataframes have the same shape
    if len(set(df.shape for df in dataframes)) > 1:
        return (
            "The CSV files have different shapes. Please ensure they all have the same number of rows and columns.",
            None,
        )

    # Create a combined dataframe for comparison
    combined_df = dataframes[0].copy()
    for col in combined_df.columns:
        combined_df[col] = [
            highlight_diff(*values) for values in zip(*(df[col] for df in dataframes))
        ]

    # Calculate summary
    total_cells = dataframes[0].size
    different_cells = sum((dataframes[0] != df).sum().sum() for df in dataframes[1:])
    diff_percentage = (different_cells / (total_cells * (len(dataframes) - 1))) * 100

    summary = f"Total cells: {total_cells}\n"
    summary += f"Different cells: {different_cells}\n"
    summary += f"Percentage of differences: {diff_percentage:.2f}%"

    return summary, combined_df.to_html(escape=False)


with gr.Blocks() as app:
    gr.Markdown("# 💕 Kindness - CSV Comparison")

    with gr.Row():
        files = [gr.File(label=f"Upload CSV {i+1}") for i in range(5)]

    compare_btn = gr.Button("Compare CSVs")

    summary = gr.Textbox(label="Summary")
    diff_output = gr.HTML(label="Differences")

    compare_btn.click(compare_csvs, inputs=files, outputs=[summary, diff_output])


app.launch()