Spaces:
Runtime error
Runtime error
import gradio as gr | |
import random | |
import time | |
from huggingface_hub import InferenceClient | |
from transformers import AutoTokenizer | |
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-70B-Instruct") | |
client = InferenceClient(model="https://6af4-4-223-164-145.ngrok-free.app") | |
SYSTEM_COMMAND = {"role": "system", "content": "You are a knowledgeable assistant trained to provide accurate and helpful information. Please respond to the user's queries promptly and politely."} | |
IGNORED_TOKENS = {None, "<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"} | |
STOP_TOKENS = ["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>", "<|reserved_special_token"] | |
with gr.Blocks() as demo: | |
gr.Markdown("This demo is currently turned off") | |
tfs_history = gr.State([SYSTEM_COMMAND]) | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(label="Prompt") | |
clear = gr.Button("Clear") | |
def user(user_message, history, dict_history): | |
data = {"role": "user", "content": user_message} | |
dict_history.append(data) | |
return "", history + [[user_message, None]], dict_history | |
def bot(history, dict_history): | |
history[-1][1] = "" | |
response = {"role": "assistant", "content": ""} | |
start_tokenize = time.perf_counter() | |
text_input = tokenizer.apply_chat_template(dict_history, tokenize=False, add_generation_prompt=True) | |
end_tokenize = time.perf_counter() | |
try: | |
for token in client.text_generation(prompt=text_input, max_new_tokens=300, stop_sequences=STOP_TOKENS, stream=True): | |
if token not in IGNORED_TOKENS: | |
history[-1][1] += token | |
response["content"] += token | |
yield history | |
finally: | |
dict_history.append(response) | |
def clear_history(tfs_history): | |
tfs_history = tfs_history[:1] | |
return tfs_history | |
msg.submit( | |
user, | |
inputs=[msg, chatbot, tfs_history], | |
outputs=[msg, chatbot, tfs_history], | |
queue=False).then( | |
bot, | |
[chatbot, tfs_history], | |
chatbot | |
) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
clear.click(clear_history, tfs_history, tfs_history, queue=False) | |
demo.queue() | |
demo.launch() | |