Spaces:
Running
Running
taka-yamakoshi
commited on
Commit
•
9240bf4
1
Parent(s):
21c2f11
minor update
Browse files
app.py
CHANGED
@@ -106,19 +106,20 @@ if __name__=='__main__':
|
|
106 |
tokenizer = load_model(tokenizer_name)
|
107 |
|
108 |
comparison_mode = st.sidebar.checkbox('Compare two texts')
|
109 |
-
detokenize = st.sidebar.checkbox('de-tokenize')
|
110 |
if comparison_mode:
|
111 |
sent_cols = st.columns(2)
|
112 |
num_tokens = {}
|
113 |
sents = {}
|
114 |
for sent_id, sent_col in enumerate(sent_cols):
|
115 |
with sent_col:
|
116 |
-
sentence = st.text_input(f'Text {sent_id+1}')
|
117 |
-
sents[f'sent_{sent_id+1}'] = sentence
|
118 |
if detokenize:
|
|
|
119 |
num_tokens[f'sent_{sent_id+1}'] = DeTokenizeText(sentence)
|
120 |
else:
|
|
|
121 |
num_tokens[f'sent_{sent_id+1}'] = TokenizeText(sentence,tokenizer_name)
|
|
|
122 |
|
123 |
if len(sents['sent_1'])>0 and len(sents['sent_2'])>0:
|
124 |
st.markdown(generate_markdown('Result: ',size=16), unsafe_allow_html=True)
|
@@ -128,8 +129,9 @@ if __name__=='__main__':
|
|
128 |
st.markdown(generate_markdown('Not Matched... ',color='Salmon'), unsafe_allow_html=True)
|
129 |
|
130 |
else:
|
131 |
-
sentence = st.text_input(f'Text')
|
132 |
if detokenize:
|
|
|
133 |
num_tokens = DeTokenizeText(sentence)
|
134 |
else:
|
|
|
135 |
num_tokens = TokenizeText(sentence,tokenizer_name)
|
|
|
106 |
tokenizer = load_model(tokenizer_name)
|
107 |
|
108 |
comparison_mode = st.sidebar.checkbox('Compare two texts')
|
109 |
+
detokenize = st.sidebar.checkbox('de-tokenize (make sure to type in integers separated by single spaces)')
|
110 |
if comparison_mode:
|
111 |
sent_cols = st.columns(2)
|
112 |
num_tokens = {}
|
113 |
sents = {}
|
114 |
for sent_id, sent_col in enumerate(sent_cols):
|
115 |
with sent_col:
|
|
|
|
|
116 |
if detokenize:
|
117 |
+
sentence = st.text_input(f'Tokenized IDs {sent_id+1}')
|
118 |
num_tokens[f'sent_{sent_id+1}'] = DeTokenizeText(sentence)
|
119 |
else:
|
120 |
+
sentence = st.text_input(f'Text {sent_id+1}')
|
121 |
num_tokens[f'sent_{sent_id+1}'] = TokenizeText(sentence,tokenizer_name)
|
122 |
+
sents[f'sent_{sent_id+1}'] = sentence
|
123 |
|
124 |
if len(sents['sent_1'])>0 and len(sents['sent_2'])>0:
|
125 |
st.markdown(generate_markdown('Result: ',size=16), unsafe_allow_html=True)
|
|
|
129 |
st.markdown(generate_markdown('Not Matched... ',color='Salmon'), unsafe_allow_html=True)
|
130 |
|
131 |
else:
|
|
|
132 |
if detokenize:
|
133 |
+
sentence = st.text_input(f'Tokenized IDs')
|
134 |
num_tokens = DeTokenizeText(sentence)
|
135 |
else:
|
136 |
+
sentence = st.text_input(f'Text')
|
137 |
num_tokens = TokenizeText(sentence,tokenizer_name)
|