Spaces:

hkunlp
/

Binder

Runtime error

App Files Files Community

Timothyxxx commited on Oct 12, 2022

Commit

9654ebc

•

1 Parent(s): 0ab286c

Update interface

Browse files

Files changed (2) hide show

app.py +45 -26
resources/demo_description.md +5 -0

app.py CHANGED Viewed

@@ -15,12 +15,16 @@ from generation.generator import Generator
 import time
 ROOT_DIR = os.path.join(os.path.dirname(__file__), "./")
 EXAMPLE_TABLES = {
     "Estonia men's national volleyball team": (558, "what are the total number of players from france?"),
     "Highest mountain peaks of California": (5, "which is the lowest mountain?"),
     "2010–11 UAB Blazers men's basketball team": (1, "how many players come from alabama?"),
-    "1999 European Tour": (209, "how many consecutive times was south africa the host country?"),
-    "Nissan SR20DET": (438, "which car is the only one with more than 230 hp?"),
 }
@@ -65,7 +69,7 @@ def generate_binder_program(_args, _generator, _data_item):
     max_prompt_tokens = _args.max_api_total_tokens - _args.max_generation_tokens
     from transformers import AutoTokenizer
     tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=os.path.join(ROOT_DIR, "utils", "gpt2"))
-    while len(tokenizer.tokenize(prompt)) >= max_prompt_tokens:  # TODO: Add shrink rows
         n_shots -= 1
         assert n_shots >= 0
         few_shot_prompt = _generator.build_few_shot_prompt_from_file(
@@ -122,13 +126,8 @@ keys = [get_key()]
 # The title
 st.markdown("# Binder Playground")
-# Summary about Binder
-read_markdown('resources/summary.md')
-# Introduction of Binder
-# todo: Write Binder introduction here
-# read_markdown('resources/introduction.md')
-st.image('resources/intro.png')
 # Upload tables/Switch tables
@@ -141,7 +140,6 @@ with col1:
             "Estonia men's national volleyball team",
             "Highest mountain peaks of California",
             "2010–11 UAB Blazers men's basketball team",
-            "1999 European Tour",
             "Nissan SR20DET",
         )
     )
@@ -163,7 +161,6 @@ question = st.text_input(
     value=EXAMPLE_TABLES[selected_table_title][1]
 )
 with col1:
-    # todo: Why selecting language will flush the page?
     selected_language = st.selectbox(
         "Select a programming language",
         ("SQL", "Python"),
@@ -188,10 +185,12 @@ with st.spinner("Generating program ..."):
 # Do execution
-st.markdown("#### Binder program")
 if selected_language == 'SQL':
-    with st.container():
-        st.write(binder_program)
     executor = NSQLExecutor(args, keys=keys)
 elif selected_language == 'Python':
     st.code(binder_program, language='python')
@@ -204,28 +203,48 @@ try:
     os.makedirs('tmp_for_vis/', exist_ok=True)
     with st.spinner("Executing program ..."):
         exec_answer = executor.nsql_exec(stamp, binder_program, db)
-    # todo: Make it more pretty!
-    # todo: Do we need vis for Python?
     if selected_language == 'SQL':
         with open("tmp_for_vis/{}_tmp_for_vis_steps.txt".format(stamp), "r") as f:
             steps = json.load(f)
-        st.markdown("#### Steps & Intermediate results")
         for i, step in enumerate(steps):
-            st.markdown(step)
-            st.text("↓")
             with st.spinner('...'):
                 time.sleep(1)
             with open("tmp_for_vis/{}_result_step_{}.txt".format(stamp, i), "r") as f:
                 result_in_this_step = json.load(f)
-            if isinstance(result_in_this_step, Dict):
-                st.dataframe(pd.DataFrame(pd.DataFrame(result_in_this_step["rows"], columns=result_in_this_step["header"])))
-            else:
-                st.markdown(result_in_this_step)
-            st.text("↓")
     elif selected_language == 'Python':
         pass
     if isinstance(exec_answer, list) and len(exec_answer) == 1:
         exec_answer = exec_answer[0]
-    st.markdown(f'Execution answer: {exec_answer}')
 except Exception as e:
     traceback.print_exc()

 import time
 ROOT_DIR = os.path.join(os.path.dirname(__file__), "./")
+# todo: Add more binder questions, need careful cherry-picks
 EXAMPLE_TABLES = {
     "Estonia men's national volleyball team": (558, "what are the total number of players from france?"),
+    # 'how old is kert toobal'
     "Highest mountain peaks of California": (5, "which is the lowest mountain?"),
+    # 'which mountain is in the most north place?'
     "2010–11 UAB Blazers men's basketball team": (1, "how many players come from alabama?"),
+    # 'how many players are born after 1996?'
+    "Nissan SR20DET": (438, "which car has power more than 170 kw?"),
+    # ''
 }
     max_prompt_tokens = _args.max_api_total_tokens - _args.max_generation_tokens
     from transformers import AutoTokenizer
     tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=os.path.join(ROOT_DIR, "utils", "gpt2"))
+    while len(tokenizer.tokenize(prompt)) >= max_prompt_tokens:
         n_shots -= 1
         assert n_shots >= 0
         few_shot_prompt = _generator.build_few_shot_prompt_from_file(
 # The title
 st.markdown("# Binder Playground")
+# Demo description
+read_markdown('resources/demo_description.md')
 # Upload tables/Switch tables
             "Estonia men's national volleyball team",
             "Highest mountain peaks of California",
             "2010–11 UAB Blazers men's basketball team",
             "Nissan SR20DET",
         )
     )
     value=EXAMPLE_TABLES[selected_table_title][1]
 )
 with col1:
     selected_language = st.selectbox(
         "Select a programming language",
         ("SQL", "Python"),
 # Do execution
+st.subheader("Binder program")
 if selected_language == 'SQL':
+    st.markdown('```sql\n' + binder_program + '\n```')
+    # st.markdown('```' + binder_program + '```')
+    # with st.container():
+    #     st.write(binder_program)
     executor = NSQLExecutor(args, keys=keys)
 elif selected_language == 'Python':
     st.code(binder_program, language='python')
     os.makedirs('tmp_for_vis/', exist_ok=True)
     with st.spinner("Executing program ..."):
         exec_answer = executor.nsql_exec(stamp, binder_program, db)
     if selected_language == 'SQL':
         with open("tmp_for_vis/{}_tmp_for_vis_steps.txt".format(stamp), "r") as f:
             steps = json.load(f)
+        col1, col2, col3 = st.columns([4.7, 0.6, 4.7])
+        # col1.subheader('Steps')
+        # col3.subheader('Intermediate results')
         for i, step in enumerate(steps):
+            col1, _, _ = st.columns([4.7, 0.6, 4.7])
+            with col1:
+                st.markdown(f'**Step#{i+1}**')
+            col1, col2, col3 = st.columns([4.7, 0.6, 4.7])
+            with col1:
+                st.markdown('```sql\n' + step + '\n```')
+                # st.markdown('```' + step + '```')
+                # with st.container():
+                #     st.write(step)
+            with col2:
+                st.markdown('$\\rightarrow$')
             with st.spinner('...'):
                 time.sleep(1)
             with open("tmp_for_vis/{}_result_step_{}.txt".format(stamp, i), "r") as f:
                 result_in_this_step = json.load(f)
+            with col3:
+                if isinstance(result_in_this_step, Dict):
+                    rows = result_in_this_step["rows"]
+                    header = result_in_this_step["header"]
+                    if isinstance(header, list):
+                        for idx in range(len(header)):
+                            if header[idx].startswith('col_'):
+                                header[idx] = step
+                    st.dataframe(pd.DataFrame(pd.DataFrame(rows, columns=header)))
+                else:
+                    st.markdown(result_in_this_step)
+            with st.spinner('...'):
+                time.sleep(1)
     elif selected_language == 'Python':
         pass
     if isinstance(exec_answer, list) and len(exec_answer) == 1:
         exec_answer = exec_answer[0]
+    # st.subheader(f'Execution answer')
+    st.text('')
+    st.markdown(f"Execution answer: {exec_answer}")
+    # todo: Remove tmp files
 except Exception as e:
     traceback.print_exc()

resources/demo_description.md ADDED Viewed

	@@ -0,0 +1,5 @@

+This is an interactive demo of Binder based on GPT3 Codex.
+You can input a question about the table (maybe requiring external knowledge/functionality), and a Binder program will be generated and executed to derive the output answer.
+*Note:* Codex has query limits for openai keys, thus it may be slow in generation/execution when (high) concurrent requests occur.
+We are trying to make the demo faster and more robust, please let us know if you have any feedback!