Timothyxxx commited on
Commit
9654ebc
β€’
1 Parent(s): 0ab286c

Update interface

Browse files
Files changed (2) hide show
  1. app.py +45 -26
  2. resources/demo_description.md +5 -0
app.py CHANGED
@@ -15,12 +15,16 @@ from generation.generator import Generator
15
  import time
16
 
17
  ROOT_DIR = os.path.join(os.path.dirname(__file__), "./")
 
18
  EXAMPLE_TABLES = {
19
  "Estonia men's national volleyball team": (558, "what are the total number of players from france?"),
 
20
  "Highest mountain peaks of California": (5, "which is the lowest mountain?"),
 
21
  "2010–11 UAB Blazers men's basketball team": (1, "how many players come from alabama?"),
22
- "1999 European Tour": (209, "how many consecutive times was south africa the host country?"),
23
- "Nissan SR20DET": (438, "which car is the only one with more than 230 hp?"),
 
24
  }
25
 
26
 
@@ -65,7 +69,7 @@ def generate_binder_program(_args, _generator, _data_item):
65
  max_prompt_tokens = _args.max_api_total_tokens - _args.max_generation_tokens
66
  from transformers import AutoTokenizer
67
  tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=os.path.join(ROOT_DIR, "utils", "gpt2"))
68
- while len(tokenizer.tokenize(prompt)) >= max_prompt_tokens: # TODO: Add shrink rows
69
  n_shots -= 1
70
  assert n_shots >= 0
71
  few_shot_prompt = _generator.build_few_shot_prompt_from_file(
@@ -122,13 +126,8 @@ keys = [get_key()]
122
  # The title
123
  st.markdown("# Binder Playground")
124
 
125
- # Summary about Binder
126
- read_markdown('resources/summary.md')
127
-
128
- # Introduction of Binder
129
- # todo: Write Binder introduction here
130
- # read_markdown('resources/introduction.md')
131
- st.image('resources/intro.png')
132
 
133
  # Upload tables/Switch tables
134
 
@@ -141,7 +140,6 @@ with col1:
141
  "Estonia men's national volleyball team",
142
  "Highest mountain peaks of California",
143
  "2010–11 UAB Blazers men's basketball team",
144
- "1999 European Tour",
145
  "Nissan SR20DET",
146
  )
147
  )
@@ -163,7 +161,6 @@ question = st.text_input(
163
  value=EXAMPLE_TABLES[selected_table_title][1]
164
  )
165
  with col1:
166
- # todo: Why selecting language will flush the page?
167
  selected_language = st.selectbox(
168
  "Select a programming language",
169
  ("SQL", "Python"),
@@ -188,10 +185,12 @@ with st.spinner("Generating program ..."):
188
 
189
 
190
  # Do execution
191
- st.markdown("#### Binder program")
192
  if selected_language == 'SQL':
193
- with st.container():
194
- st.write(binder_program)
 
 
195
  executor = NSQLExecutor(args, keys=keys)
196
  elif selected_language == 'Python':
197
  st.code(binder_program, language='python')
@@ -204,28 +203,48 @@ try:
204
  os.makedirs('tmp_for_vis/', exist_ok=True)
205
  with st.spinner("Executing program ..."):
206
  exec_answer = executor.nsql_exec(stamp, binder_program, db)
207
- # todo: Make it more pretty!
208
- # todo: Do we need vis for Python?
209
  if selected_language == 'SQL':
210
  with open("tmp_for_vis/{}_tmp_for_vis_steps.txt".format(stamp), "r") as f:
211
  steps = json.load(f)
212
- st.markdown("#### Steps & Intermediate results")
 
 
213
  for i, step in enumerate(steps):
214
- st.markdown(step)
215
- st.text("↓")
 
 
 
 
 
 
 
 
 
216
  with st.spinner('...'):
217
  time.sleep(1)
218
  with open("tmp_for_vis/{}_result_step_{}.txt".format(stamp, i), "r") as f:
219
  result_in_this_step = json.load(f)
220
- if isinstance(result_in_this_step, Dict):
221
- st.dataframe(pd.DataFrame(pd.DataFrame(result_in_this_step["rows"], columns=result_in_this_step["header"])))
222
- else:
223
- st.markdown(result_in_this_step)
224
- st.text("↓")
 
 
 
 
 
 
 
 
225
  elif selected_language == 'Python':
226
  pass
227
  if isinstance(exec_answer, list) and len(exec_answer) == 1:
228
  exec_answer = exec_answer[0]
229
- st.markdown(f'Execution answer: {exec_answer}')
 
 
 
230
  except Exception as e:
231
  traceback.print_exc()
 
15
  import time
16
 
17
  ROOT_DIR = os.path.join(os.path.dirname(__file__), "./")
18
+ # todo: Add more binder questions, need careful cherry-picks
19
  EXAMPLE_TABLES = {
20
  "Estonia men's national volleyball team": (558, "what are the total number of players from france?"),
21
+ # 'how old is kert toobal'
22
  "Highest mountain peaks of California": (5, "which is the lowest mountain?"),
23
+ # 'which mountain is in the most north place?'
24
  "2010–11 UAB Blazers men's basketball team": (1, "how many players come from alabama?"),
25
+ # 'how many players are born after 1996?'
26
+ "Nissan SR20DET": (438, "which car has power more than 170 kw?"),
27
+ # ''
28
  }
29
 
30
 
 
69
  max_prompt_tokens = _args.max_api_total_tokens - _args.max_generation_tokens
70
  from transformers import AutoTokenizer
71
  tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=os.path.join(ROOT_DIR, "utils", "gpt2"))
72
+ while len(tokenizer.tokenize(prompt)) >= max_prompt_tokens:
73
  n_shots -= 1
74
  assert n_shots >= 0
75
  few_shot_prompt = _generator.build_few_shot_prompt_from_file(
 
126
  # The title
127
  st.markdown("# Binder Playground")
128
 
129
+ # Demo description
130
+ read_markdown('resources/demo_description.md')
 
 
 
 
 
131
 
132
  # Upload tables/Switch tables
133
 
 
140
  "Estonia men's national volleyball team",
141
  "Highest mountain peaks of California",
142
  "2010–11 UAB Blazers men's basketball team",
 
143
  "Nissan SR20DET",
144
  )
145
  )
 
161
  value=EXAMPLE_TABLES[selected_table_title][1]
162
  )
163
  with col1:
 
164
  selected_language = st.selectbox(
165
  "Select a programming language",
166
  ("SQL", "Python"),
 
185
 
186
 
187
  # Do execution
188
+ st.subheader("Binder program")
189
  if selected_language == 'SQL':
190
+ st.markdown('```sql\n' + binder_program + '\n```')
191
+ # st.markdown('```' + binder_program + '```')
192
+ # with st.container():
193
+ # st.write(binder_program)
194
  executor = NSQLExecutor(args, keys=keys)
195
  elif selected_language == 'Python':
196
  st.code(binder_program, language='python')
 
203
  os.makedirs('tmp_for_vis/', exist_ok=True)
204
  with st.spinner("Executing program ..."):
205
  exec_answer = executor.nsql_exec(stamp, binder_program, db)
 
 
206
  if selected_language == 'SQL':
207
  with open("tmp_for_vis/{}_tmp_for_vis_steps.txt".format(stamp), "r") as f:
208
  steps = json.load(f)
209
+ col1, col2, col3 = st.columns([4.7, 0.6, 4.7])
210
+ # col1.subheader('Steps')
211
+ # col3.subheader('Intermediate results')
212
  for i, step in enumerate(steps):
213
+ col1, _, _ = st.columns([4.7, 0.6, 4.7])
214
+ with col1:
215
+ st.markdown(f'**Step#{i+1}**')
216
+ col1, col2, col3 = st.columns([4.7, 0.6, 4.7])
217
+ with col1:
218
+ st.markdown('```sql\n' + step + '\n```')
219
+ # st.markdown('```' + step + '```')
220
+ # with st.container():
221
+ # st.write(step)
222
+ with col2:
223
+ st.markdown('$\\rightarrow$')
224
  with st.spinner('...'):
225
  time.sleep(1)
226
  with open("tmp_for_vis/{}_result_step_{}.txt".format(stamp, i), "r") as f:
227
  result_in_this_step = json.load(f)
228
+ with col3:
229
+ if isinstance(result_in_this_step, Dict):
230
+ rows = result_in_this_step["rows"]
231
+ header = result_in_this_step["header"]
232
+ if isinstance(header, list):
233
+ for idx in range(len(header)):
234
+ if header[idx].startswith('col_'):
235
+ header[idx] = step
236
+ st.dataframe(pd.DataFrame(pd.DataFrame(rows, columns=header)))
237
+ else:
238
+ st.markdown(result_in_this_step)
239
+ with st.spinner('...'):
240
+ time.sleep(1)
241
  elif selected_language == 'Python':
242
  pass
243
  if isinstance(exec_answer, list) and len(exec_answer) == 1:
244
  exec_answer = exec_answer[0]
245
+ # st.subheader(f'Execution answer')
246
+ st.text('')
247
+ st.markdown(f"Execution answer: {exec_answer}")
248
+ # todo: Remove tmp files
249
  except Exception as e:
250
  traceback.print_exc()
resources/demo_description.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ This is an interactive demo of Binder based on GPT3 Codex.
2
+ You can input a question about the table (maybe requiring external knowledge/functionality), and a Binder program will be generated and executed to derive the output answer.
3
+
4
+ *Note:* Codex has query limits for openai keys, thus it may be slow in generation/execution when (high) concurrent requests occur.
5
+ We are trying to make the demo faster and more robust, please let us know if you have any feedback!