will33am commited on
Commit
3ccff6e
1 Parent(s): 68e3512

update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from datasets import load_dataset
3
+ import re
4
+
5
+
6
+ def load_dataset_demo(name):
7
+ try:
8
+ dataset = load_dataset(name)["train"].filter(lambda x: x["flags"])
9
+ except Exception as _:
10
+ dataset = load_dataset(name)["train"]
11
+ return dataset
12
+
13
+
14
+ NAME_DATASETS = [
15
+ "Self-GRIT/selfrag_dataset-embed_query_instruct-Meta-Llama-3-70B-Instruct_temp-0.01",
16
+ "Self-GRIT/selfrag_dataset_mini-embed_query_instruct-Meta-Llama-3-70B-Instruct_temp-0.01",
17
+ "Self-GRIT/selfrag_dataset_mini-embed_query_instruct-Meta-Llama-3-8B-Instruct",
18
+ ]
19
+
20
+ DATASETS = {name: load_dataset_demo(name) for name in NAME_DATASETS}
21
+ INSTRUCTION_COL = "instruction"
22
+ OUTPUT_COL = "output"
23
+ OUTPUT_ORIGIN = "output_origin"
24
+
25
+
26
+ def extract_pairs(text):
27
+ # Regex pattern to match <embed>...</embed><passage>...</passage> pairs
28
+ pattern = r"<embed>(.*?)</embed><passage>(.*?)</passage>"
29
+ # Find all matches in the text
30
+ matches = re.findall(pattern, text, re.DOTALL)
31
+ return matches
32
+
33
+
34
+ def preprocess_qa_pairs(text):
35
+ qa_pairs = extract_pairs(text)
36
+ response = ""
37
+ if len(qa_pairs) == 0:
38
+ response = "No query-passage pairs found."
39
+ else:
40
+ for i, (query, passage) in enumerate(qa_pairs):
41
+ response += f"========================== QP-Pair {i+1} =============================\n"
42
+ response += f"Query:\n{query.strip()}\n"
43
+ response += f"Passage:\n{passage.strip()}\n\n"
44
+ return response
45
+
46
+
47
+ def output_fn(dropdown, slider):
48
+ dataset = DATASETS[dropdown]
49
+ example = dataset[int(slider)]
50
+ return (
51
+ example[INSTRUCTION_COL],
52
+ example[OUTPUT_ORIGIN],
53
+ example[OUTPUT_COL],
54
+ preprocess_qa_pairs(example[OUTPUT_COL]),
55
+ )
56
+
57
+
58
+ with gr.Blocks() as demo:
59
+ gr.Markdown("# Explore Self-RAG Datasets")
60
+ with gr.Group():
61
+ with gr.Row():
62
+ with gr.Column():
63
+ dropdown = gr.Dropdown(
64
+ NAME_DATASETS,
65
+ value=[NAME_DATASETS[0]],
66
+ multiselect=False,
67
+ label="Dataset",
68
+ info="Select the dataset name",
69
+ )
70
+ with gr.Column():
71
+ slider = gr.Slider(
72
+ minimum=0,
73
+ maximum=max([len(dataset) for _, dataset in DATASETS.items()]),
74
+ step=1,
75
+ label="#example",
76
+ value=0,
77
+ )
78
+ button = gr.Button(value="Submit", variant="primary")
79
+ with gr.Group():
80
+ with gr.Row():
81
+ output_instruction = gr.Textbox(
82
+ label="Instruction", placeholder="Instruction", type="text"
83
+ )
84
+ with gr.Row():
85
+ with gr.Row():
86
+ output_self_rag = gr.Textbox(
87
+ label="SELG-RAG output", placeholder="SELG-RAG output", type="text"
88
+ )
89
+ output_self_grit = gr.Textbox(
90
+ label="SELF-GRIT output",
91
+ placeholder="SELF-GRIT output",
92
+ type="text",
93
+ )
94
+ with gr.Group():
95
+ output_qps = gr.Textbox(
96
+ label="Query-Passage Pairs", placeholder="Query-Passage Pairs", type="text"
97
+ )
98
+ button.click(
99
+ fn=output_fn,
100
+ inputs=[dropdown, slider],
101
+ outputs=[output_instruction, output_self_rag, output_self_grit, output_qps],
102
+ )
103
+ demo.launch(share=True, debug=True)