Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoTokenizer | |
# Define tokenizers for English and Arabic | |
tokenizers = { | |
"English - BERT (bert-base-uncased)": "bert-base-uncased", | |
"Arabic - CAMeL BERT (bert-base-arabic-camelbert-ca)": "CAMeL-Lab/bert-base-arabic-camelbert-ca", | |
"Arabic - AraBERT (asafaya/bert-base-arabic)": "asafaya/bert-base-arabic" | |
} | |
# Tokenization function | |
def tokenize_text(text,model_name): | |
# Load the selected tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(tokenizers[model_name]) | |
# Tokenize the input text | |
tokens = tokenizer.tokenize(text) | |
# Return tokens and number of tokens | |
return f"Tokens: {tokens}", f"Number of tokens: {len(tokens)}" | |
# Define Gradio interface components | |
model_choice = gr.Dropdown(choices=list(tokenizers.keys()), label="Select Tokenizer", value="English - BERT (bert-base-uncased)") | |
text_input = gr.Textbox(label="Enter a sentence to tokenize") | |
# Predefined example sentences with the corresponding tokenizers | |
examples = [ | |
["The quick brown fox jumps over the lazy dog.", "English - BERT (bert-base-uncased)"], # English sentence with English tokenizer | |
["ุงููู ุฑ ุฌู ูู ูู ุงูุณู ุงุก.", "Arabic - CAMeL BERT (bert-base-arabic-camelbert-ca)"] # Arabic sentence with Arabic tokenizer | |
] | |
# Set up the Gradio interface | |
demo = gr.Interface( | |
fn=tokenize_text, | |
inputs=[text_input,model_choice], | |
outputs=[gr.Textbox(label="Tokens"), gr.Textbox(label="Number of Tokens")], # Properly named outputs | |
title="Hugging Face Tokenizer Explorer", | |
description="Enter a sentence or use one of the example sentences below to see how different tokenizers work.", | |
examples=examples, # Provide examples that apply to the text input field | |
allow_flagging=False | |
) |