import gradio as gr from transformers import AutoTokenizer def load_tokenizer(repo_path): try: # Load the tokenizer from the provided repository path tokenizer = AutoTokenizer.from_pretrained(repo_path, trust_remote_code=True) messages = [ {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, {"role": "user", "content": "Who are you?"}, ] input_ids = tokenizer.apply_chat_template( messages, add_generation_prompt=True, tokenize=False, ) # Extract relevant details about the tokenizer and chat template details = { "Tokenizer Name": tokenizer.name_or_path, "Vocabulary Size": tokenizer.vocab_size, "Model Max Length": tokenizer.model_max_length, "Special Tokens": tokenizer.all_special_tokens, "Chat Template": input_ids, } # Convert details to a formatted string for display details_str = "\n".join([f"{key}: {value}" for key, value in details.items()]) return details_str except Exception as e: return str(e) # Create the Gradio interface iface = gr.Interface( fn=load_tokenizer, inputs=gr.Textbox(label="Hugging Face Repository Path (e.g., user/repo)"), outputs=gr.Textbox(label="Tokenizer Details"), title="Hugging Face Tokenizer Loader", description="Enter the Hugging Face repository path to load the tokenizer and view its details." ) # Launch the app iface.launch()