Spaces:
Running
Running
taka-yamakoshi
commited on
Commit
•
dc80c0d
1
Parent(s):
ed9112c
minor update on instructions
Browse files
app.py
CHANGED
@@ -98,7 +98,8 @@ if __name__=='__main__':
|
|
98 |
st.markdown(generate_markdown('quick and easy way to explore how tokenizers work',size=24), unsafe_allow_html=True)
|
99 |
|
100 |
# Select and load the tokenizer
|
101 |
-
|
|
|
102 |
('bert-base-uncased','bert-large-cased',
|
103 |
'gpt2','gpt2-large',
|
104 |
'roberta-base','roberta-large',
|
@@ -106,11 +107,11 @@ if __name__=='__main__':
|
|
106 |
tokenizer = load_model(tokenizer_name)
|
107 |
|
108 |
st.sidebar.write('2. Optional settings')
|
109 |
-
st.sidebar.write(f'"Compare two texts" compares # tokens for two pieces of text '\
|
110 |
-
+f'and "de-tokenize" converts a list of tokenized indices back to strings.')
|
111 |
-
st.sidebar.write(f'For "de-tokenize", make sure to type in integers, separated by single spaces')
|
112 |
comparison_mode = st.sidebar.checkbox('Compare two texts')
|
113 |
detokenize = st.sidebar.checkbox('de-tokenize')
|
|
|
|
|
|
|
114 |
if comparison_mode:
|
115 |
sent_cols = st.columns(2)
|
116 |
num_tokens = {}
|
|
|
98 |
st.markdown(generate_markdown('quick and easy way to explore how tokenizers work',size=24), unsafe_allow_html=True)
|
99 |
|
100 |
# Select and load the tokenizer
|
101 |
+
st.sidebar.write('1. Choose the tokenizer from below')
|
102 |
+
tokenizer_name = st.sidebar.selectbox('',
|
103 |
('bert-base-uncased','bert-large-cased',
|
104 |
'gpt2','gpt2-large',
|
105 |
'roberta-base','roberta-large',
|
|
|
107 |
tokenizer = load_model(tokenizer_name)
|
108 |
|
109 |
st.sidebar.write('2. Optional settings')
|
|
|
|
|
|
|
110 |
comparison_mode = st.sidebar.checkbox('Compare two texts')
|
111 |
detokenize = st.sidebar.checkbox('de-tokenize')
|
112 |
+
st.sidebar.write(f'"Compare two texts" compares # tokens for two pieces of text '\
|
113 |
+
+f'and "de-tokenize" converts a list of tokenized indices back to strings.')
|
114 |
+
st.sidebar.write(f'For "de-tokenize", make sure to type in integers, separated by single spaces.')
|
115 |
if comparison_mode:
|
116 |
sent_cols = st.columns(2)
|
117 |
num_tokens = {}
|