File size: 7,822 Bytes
632338f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f07ca0
632338f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f07ca0
632338f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
import gradio as gr
import pandas as pd
import os
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import HfApi
from uploads import add_new_eval

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@inproceedings{kumar-etal-2024-booksql,
    title = "BookSQL: A Large Scale Text-to-SQL Dataset for Accounting Domain",
    author = "Kumar, Rahul and Raja, Amar and Harsola, Shrutendra and Subrahmaniam, Vignesh and Modi, Ashutosh",
    booktitle = "Proceedings of the 2024 Conference of the North American Chapter of the Association for Computational Linguistics",
    month = "march",
    year = "2024",
    address = "Mexico City, Mexico",
    publisher = "Association for Computational Linguistics"
}"""

api = HfApi()
TOKEN = os.environ.get("TOKEN", None)
LEADERBOARD_PATH = f"Exploration-lab/BookSQL-Leaderboard"


def restart_space():
    api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN)


# Function to load data from a given CSV file
def baseline_load_data(tasks):
    # version = version.replace("%", "p")
    file_path = f"submissions/baseline/baseline.csv"  # Replace with your file paths
    df = pd.read_csv(file_path)

    # we only want specific columns and in a specific order

    # column_names = [
    #     "Method",
    #     "Submitted By",
    #     "L-NER",
    #     "RR",
    #     "CJPE",
    #     "BAIL",
    #     "LSI",
    #     "PCR",
    #     "SUMM",
    #     "Average",
    # ]
    column_names = [
        "Method",
        "Submitted By",
        "EMA",
        "EX",
        "BLEU-4",
        "ROUGE-L"
    ]
    if tasks is None:
        breakpoint()
    # based on the tasks, remove the columns that are not needed
    if "EMA" not in tasks:
        column_names.remove("EMA")
    if "EX" not in tasks:
        column_names.remove("EX")
    if "BLEU-4" not in tasks:
        column_names.remove("BLEU-4")
    if "ROUGE-L" not in tasks:
        column_names.remove("ROUGE-L")

    df = df[column_names]
    # df = df.sort_values(by="Average", ascending=False)
    df = df.drop_duplicates(subset=["Method"], keep="first")

    return df


def load_data(tasks):
    baseline_df = baseline_load_data(tasks)

    return baseline_df


# Function for searching in the leaderboard
def search_leaderboard(df, query):
    if query == "":
        return df
    else:
        return df[df["Method"].str.contains(query)]


# Function to change the version of the leaderboard
def change_version(tasks):
    new_df = load_data(tasks)
    return new_df


# Initialize Gradio app
demo = gr.Blocks()

with demo:
    gr.Markdown(
        """
    ## 🥇 BookSQL Leaderboard
    Given the importance and wide prevalence of business databases across the world, the proposed dataset, [BookSQL](https://arxiv.org/abs/2406.07860) focuses on the finance and accounting domain. Accounting databases are used across a wide spectrum of industries like construction, healthcare, retail, educational services, insurance, restaurant, real estate, etc. Business in these industries arranges their financial transactions into their own different set of categories (called a chart of accounts Industry Details in accounting terminology.
    Text-to-SQL system developed on BookSQL will be robust at handling various types of accounting databases. The total size of the dataset is 1 million. The dataset is prepared under financial experts' supervision, and the dataset's statistics are provided in below table. The dataset consists of 27 businesses, and each business has around 35k - 40k transactions
    Read more at [https://exploration-lab.github.io/BookSQL/](https://exploration-lab.github.io/BookSQL/).
    Please follow this format for uploading prediction file (https://huggingface.co/spaces/Exploration-Lab/BookSQL/blob/main/sample_prediction.csv)
    """
    )

    with gr.Row():
        with gr.Accordion("📙 Citation", open=False):
            citation_button = gr.Textbox(
                value=CITATION_BUTTON_TEXT,
                label=CITATION_BUTTON_LABEL,
                elem_id="citation-button",
                show_copy_button=True,
            )  # .style(show_copy_button=True)

    with gr.Tabs():
        with gr.TabItem("Leaderboard"):

            with gr.Row():
                # tasks_checkbox = gr.CheckboxGroup(
                #     label="Select Tasks",
                #     choices=["L-NER", "RR", "CJPE", "BAIL", "LSI", "PCR", "SUMM"],
                #     value=["L-NER", "RR", "CJPE", "BAIL", "LSI", "PCR", "SUMM"],
                # )
                tasks_checkbox = gr.CheckboxGroup(
                    label="Select Tasks",
                    choices=["EMA","EX","BLEU-4","ROUGE-L"],
                    value=["EMA","EX","BLEU-4","ROUGE-L"],
                )

            with gr.Row():
                search_bar = gr.Textbox(
                    placeholder="Search for methods...",
                    show_label=False,
                )

            leaderboard_table = gr.components.Dataframe(
                value=load_data(
                    # "baseline",
                    ["EMA","EX","BLEU-4","ROUGE-L"],
                ),
                interactive=True,
                visible=True,
            )

            # version_dropdown.change(
            #     change_version,
            #     inputs=[model_dropdown, version_dropdown, tasks_checkbox],
            #     outputs=leaderboard_table,
            # )

            # model_dropdown.change(
            #     change_version,
            #     inputs=[model_dropdown, version_dropdown, tasks_checkbox],
            #     outputs=leaderboard_table,
            # )

            search_bar.change(
                search_leaderboard,
                inputs=[
                    leaderboard_table,
                    search_bar,
                    # tasks_checkbox
                ],
                outputs=leaderboard_table,
            )

            tasks_checkbox.change(
                change_version,
                inputs=[tasks_checkbox],
                outputs=leaderboard_table,
            )

    with gr.Accordion("Submit a new model for evaluation"):
        with gr.Row():
            with gr.Column():
                method_name_textbox = gr.Textbox(label="Method name")
                url_textbox = gr.Textbox(label="Url to model information")
            with gr.Column():
                organisation = gr.Textbox(label="Organisation")
                mail = gr.Textbox(label="Contact email")
                file_output = gr.File()

        submit_button = gr.Button("Submit Eval")
        submission_result = gr.Markdown()
        submit_button.click(
            add_new_eval,
            [
                method_name_textbox,
                url_textbox,
                file_output,
                organisation,
                mail,
            ],
            submission_result,
        )

    gr.Markdown(
        """
    ## Quick Links

    - [**GitHub Repository**](https://github.com/exploration-lab/BookSQL): Access the source code, fine-tuning scripts, and additional resources for the BookSQL dataset.
    - [**arXiv Paper**](https://arxiv.org/abs/2406.07860): Detailed information about the BookSQL dataset and its significance in unlearning tasks.
    - [**Dataset on Hugging Face**](https://huggingface.co/datasets/Exploration-Lab/BookSQL): Direct link to download the BookSQL dataset.


    """
    )

# scheduler = BackgroundScheduler()
# scheduler.add_job(restart_space, "interval", seconds=1800)
# scheduler.start()
# demo.queue(default_concurrency_limit=40).launch()

# demo.launch()
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=3600)
scheduler.start()
# demo.launch(debug=True)
demo.launch(share=True)