Spaces:
Runtime error
Runtime error
HeshamHaroon
commited on
Commit
•
4c91389
1
Parent(s):
3b0ce68
Update app.py
Browse files
app.py
CHANGED
@@ -36,6 +36,7 @@ def compare_tokenizers(tokenizer_name, text):
|
|
36 |
if tokenizer_name in ["FreedomIntelligence/AceGPT-13B", "FreedomIntelligence/AceGPT-7B", "inception-mbzuai/jais-13b"]:
|
37 |
tokenizer = tokenizers[tokenizer_name]()
|
38 |
tokens = tokenizer.tokenize(text)
|
|
|
39 |
encoded_output = tokenizer.encode(text, add_special_tokens=True, return_tensors="pt")
|
40 |
decoded_text = tokenizer.decode(encoded_output[0], skip_special_tokens=True)
|
41 |
else:
|
@@ -57,7 +58,7 @@ inputs_component = [
|
|
57 |
]
|
58 |
|
59 |
outputs_component = gr.Dataframe(
|
60 |
-
headers=["Tokenizer", "Tokens", "Encoded
|
61 |
label="Results",
|
62 |
type="pandas"
|
63 |
)
|
|
|
36 |
if tokenizer_name in ["FreedomIntelligence/AceGPT-13B", "FreedomIntelligence/AceGPT-7B", "inception-mbzuai/jais-13b"]:
|
37 |
tokenizer = tokenizers[tokenizer_name]()
|
38 |
tokens = tokenizer.tokenize(text)
|
39 |
+
tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
|
40 |
encoded_output = tokenizer.encode(text, add_special_tokens=True, return_tensors="pt")
|
41 |
decoded_text = tokenizer.decode(encoded_output[0], skip_special_tokens=True)
|
42 |
else:
|
|
|
58 |
]
|
59 |
|
60 |
outputs_component = gr.Dataframe(
|
61 |
+
headers=["Tokenizer", "Tokens", "Encoded", "Decoded"],
|
62 |
label="Results",
|
63 |
type="pandas"
|
64 |
)
|