Spaces:

jungledude23
/

talk-to-data

Sleeping

App Files Files Community

RohitMidha23 commited on Jun 7

Commit

6abee0e

•

1 Parent(s): 40fc826

init

Browse files

Files changed (5) hide show

app.py +23 -0
requirements.txt +3 -0
sample.csv +6 -0
src/constants.py +10 -0
src/inference.py +48 -0

app.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from src.inference import infer
+import gradio as gr
+def main():
+    description = "A quick and easy way to understand and talk to your data!"
+    iface = gr.Interface(fn=infer,
+                         inputs=[gr.Textbox(label="Query"),
+                                 gr.File(label="CSV file")],
+                         outputs=[gr.JSON(label="Result"),
+                                  gr.Dataframe(label="Data")],
+                         examples=[
+                             ["Who scored the highest?", "sample.csv"],
+                         ],
+                         title="Talk to your Data!",
+                         description=description,
+                         allow_flagging='never')
+    iface.launch(enable_queue=True)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+transformers==4.41.2
+torch==2.3.0
+gradio

sample.csv ADDED Viewed

	@@ -0,0 +1,6 @@

+ID,Name,Age,Score
+1,Alice,23,85.0
+2,Bob,35,90.5
+3,Charlie,45,78.0
+4,David,25,88.5
+5,Eve,30,92.0

src/constants.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from enum import Enum
+# create an enum with all aggregation types
+class Aggregation(Enum):
+    NONE = 0
+    SUM = 1
+    AVERAGE = 2
+    COUNT = 3
+id2aggregation = {x.value: x.name for x in Aggregation}

src/inference.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from transformers import TapasTokenizer, TapasForQuestionAnswering
+import pandas as pd
+from typing import List, Dict
+from constants import id2aggregation
+def infer(query: str, file_name: str, model_name: str="google/tapas-base-finetuned-wtq") -> Dict[str, str]:
+    # Load the file
+    table = pd.read_csv(file_name, delimiter=",")
+    table = table.astype(str)
+    # Load the model
+    model = TapasForQuestionAnswering.from_pretrained(model_name)
+    tokenizer = TapasTokenizer.from_pretrained(model_name)
+    # Make predictions
+    queries = [query]
+    inputs = tokenizer(table=table, queries=queries, padding="max_length", return_tensors="pt")
+    outputs = model(**inputs)
+    predicted_answer_coordinates, predicted_aggregation_indices = tokenizer.convert_logits_to_predictions(
+        inputs, outputs.logits.detach(), outputs.logits_aggregation.detach()
+    )  # predicted_answer_coordinates: contains coordinates for the respective answer cells, predicted_aggregation_indices: contains the aggregation type for each query
+    aggregation_predictions_string = [id2aggregation[x] for x in predicted_aggregation_indices]
+    answers = []
+    for coordinates in predicted_answer_coordinates:
+        if len(coordinates) == 1:
+            # only a single cell:
+            answers.append(table.iat[coordinates[0]])
+        else:
+            # multiple cells
+            cell_values = []
+            for coordinate in coordinates:
+                cell_values.append(table.iat[coordinate])
+            answers.append(", ".join(cell_values))
+    # Create the answer string
+    answer_str = ""
+    for query, answer, predicted_agg in zip(queries, answers, aggregation_predictions_string):
+        if predicted_agg == "NONE":
+            answer_str = answer
+        else:
+            answer_str = f"{predicted_agg} : {answer}"
+    return {
+        "query": query,
+        "answer": answer_str
+    }