Spaces:
Sleeping
Sleeping
RohitMidha23
commited on
Commit
•
6abee0e
1
Parent(s):
40fc826
init
Browse files- app.py +23 -0
- requirements.txt +3 -0
- sample.csv +6 -0
- src/constants.py +10 -0
- src/inference.py +48 -0
app.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from src.inference import infer
|
2 |
+
import gradio as gr
|
3 |
+
|
4 |
+
|
5 |
+
def main():
|
6 |
+
description = "A quick and easy way to understand and talk to your data!"
|
7 |
+
|
8 |
+
iface = gr.Interface(fn=infer,
|
9 |
+
inputs=[gr.Textbox(label="Query"),
|
10 |
+
gr.File(label="CSV file")],
|
11 |
+
outputs=[gr.JSON(label="Result"),
|
12 |
+
gr.Dataframe(label="Data")],
|
13 |
+
examples=[
|
14 |
+
["Who scored the highest?", "sample.csv"],
|
15 |
+
],
|
16 |
+
title="Talk to your Data!",
|
17 |
+
description=description,
|
18 |
+
allow_flagging='never')
|
19 |
+
iface.launch(enable_queue=True)
|
20 |
+
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
transformers==4.41.2
|
2 |
+
torch==2.3.0
|
3 |
+
gradio
|
sample.csv
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ID,Name,Age,Score
|
2 |
+
1,Alice,23,85.0
|
3 |
+
2,Bob,35,90.5
|
4 |
+
3,Charlie,45,78.0
|
5 |
+
4,David,25,88.5
|
6 |
+
5,Eve,30,92.0
|
src/constants.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from enum import Enum
|
2 |
+
|
3 |
+
# create an enum with all aggregation types
|
4 |
+
class Aggregation(Enum):
|
5 |
+
NONE = 0
|
6 |
+
SUM = 1
|
7 |
+
AVERAGE = 2
|
8 |
+
COUNT = 3
|
9 |
+
|
10 |
+
id2aggregation = {x.value: x.name for x in Aggregation}
|
src/inference.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import TapasTokenizer, TapasForQuestionAnswering
|
2 |
+
import pandas as pd
|
3 |
+
from typing import List, Dict
|
4 |
+
from constants import id2aggregation
|
5 |
+
|
6 |
+
def infer(query: str, file_name: str, model_name: str="google/tapas-base-finetuned-wtq") -> Dict[str, str]:
|
7 |
+
# Load the file
|
8 |
+
table = pd.read_csv(file_name, delimiter=",")
|
9 |
+
table = table.astype(str)
|
10 |
+
|
11 |
+
# Load the model
|
12 |
+
model = TapasForQuestionAnswering.from_pretrained(model_name)
|
13 |
+
tokenizer = TapasTokenizer.from_pretrained(model_name)
|
14 |
+
|
15 |
+
# Make predictions
|
16 |
+
queries = [query]
|
17 |
+
inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
|
18 |
+
outputs = model(**inputs)
|
19 |
+
predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
|
20 |
+
inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
|
21 |
+
) # predicted_answer_coordinates: contains coordinates for the respective answer cells, predicted_aggregation_indices: contains the aggregation type for each query
|
22 |
+
aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
|
23 |
+
|
24 |
+
answers = []
|
25 |
+
for coordinates in predicted_answer_coordinates:
|
26 |
+
if len(coordinates) == 1:
|
27 |
+
# only a single cell:
|
28 |
+
answers.append(table.iat[coordinates[0]])
|
29 |
+
else:
|
30 |
+
# multiple cells
|
31 |
+
cell_values = []
|
32 |
+
for coordinate in coordinates:
|
33 |
+
cell_values.append(table.iat[coordinate])
|
34 |
+
answers.append(", ".join(cell_values))
|
35 |
+
|
36 |
+
# Create the answer string
|
37 |
+
answer_str = ""
|
38 |
+
for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
|
39 |
+
|
40 |
+
if predicted_agg == "NONE":
|
41 |
+
answer_str = answer
|
42 |
+
else:
|
43 |
+
answer_str = f"{predicted_agg} : {answer}"
|
44 |
+
|
45 |
+
return {
|
46 |
+
"query": query,
|
47 |
+
"answer": answer_str
|
48 |
+
}
|