Spaces:

taka-yamakoshi
/

tokenizer-demo

Running

taka-yamakoshi commited on Jul 16, 2022

Commit

dc80c0d

•

1 Parent(s): ed9112c

minor update on instructions

Files changed (1) hide show

app.py CHANGED Viewed

@@ -98,7 +98,8 @@ if __name__=='__main__':
     st.markdown(generate_markdown('quick and easy way to explore how tokenizers work',size=24), unsafe_allow_html=True)
     # Select and load the tokenizer
-    tokenizer_name = st.sidebar.selectbox('1. Choose the tokenizer from below',
                                             ('bert-base-uncased','bert-large-cased',
                                             'gpt2','gpt2-large',
                                             'roberta-base','roberta-large',
@@ -106,11 +107,11 @@ if __name__=='__main__':
     tokenizer = load_model(tokenizer_name)
     st.sidebar.write('2. Optional settings')
-    st.sidebar.write(f'"Compare two texts" compares # tokens for two pieces of text '\
-                        +f'and "de-tokenize" converts a list of tokenized indices back to strings.')
-    st.sidebar.write(f'For "de-tokenize", make sure to type in integers, separated by single spaces')
     comparison_mode = st.sidebar.checkbox('Compare two texts')
     detokenize = st.sidebar.checkbox('de-tokenize')
     if comparison_mode:
         sent_cols = st.columns(2)
         num_tokens = {}

     st.markdown(generate_markdown('quick and easy way to explore how tokenizers work',size=24), unsafe_allow_html=True)
     # Select and load the tokenizer
+    st.sidebar.write('1. Choose the tokenizer from below')
+    tokenizer_name = st.sidebar.selectbox('',
                                             ('bert-base-uncased','bert-large-cased',
                                             'gpt2','gpt2-large',
                                             'roberta-base','roberta-large',
     tokenizer = load_model(tokenizer_name)
     st.sidebar.write('2. Optional settings')
     comparison_mode = st.sidebar.checkbox('Compare two texts')
     detokenize = st.sidebar.checkbox('de-tokenize')
+    st.sidebar.write(f'"Compare two texts" compares # tokens for two pieces of text '\
+                        +f'and "de-tokenize" converts a list of tokenized indices back to strings.')
+    st.sidebar.write(f'For "de-tokenize", make sure to type in integers, separated by single spaces.')
     if comparison_mode:
         sent_cols = st.columns(2)
         num_tokens = {}