Spaces:

fxmarty
/

tgi-mi300-demo-chat

Runtime error

fxmarty

add turned off message

f4901c0 5 months ago

2.31 kB

	import gradio as gr
	import random
	import time

	from huggingface_hub import InferenceClient
	from transformers import AutoTokenizer

	tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-70B-Instruct")

	client = InferenceClient(model="https://6af4-4-223-164-145.ngrok-free.app")

	SYSTEM_COMMAND = {"role": "system", "content": "You are a knowledgeable assistant trained to provide accurate and helpful information. Please respond to the user's queries promptly and politely."}

	IGNORED_TOKENS = {None, "<\|start_header_id\|>", "<\|end_header_id\|>", "<\|eot_id\|>", "<\|reserved_special_token"}
	STOP_TOKENS = ["<\|start_header_id\|>", "<\|end_header_id\|>", "<\|eot_id\|>", "<\|reserved_special_token"]

	with gr.Blocks() as demo:
	gr.Markdown("This demo is currently turned off")
	tfs_history = gr.State([SYSTEM_COMMAND])
	chatbot = gr.Chatbot()
	msg = gr.Textbox(label="Prompt")
	clear = gr.Button("Clear")

	def user(user_message, history, dict_history):
	data = {"role": "user", "content": user_message}
	dict_history.append(data)
	return "", history + [[user_message, None]], dict_history

	def bot(history, dict_history):
	history[-1][1] = ""
	response = {"role": "assistant", "content": ""}
	start_tokenize = time.perf_counter()
	text_input = tokenizer.apply_chat_template(dict_history, tokenize=False, add_generation_prompt=True)
	end_tokenize = time.perf_counter()

	try:
	for token in client.text_generation(prompt=text_input, max_new_tokens=300, stop_sequences=STOP_TOKENS, stream=True):
	if token not in IGNORED_TOKENS:
	history[-1][1] += token
	response["content"] += token
	yield history
	finally:
	dict_history.append(response)

	def clear_history(tfs_history):
	tfs_history = tfs_history[:1]

	return tfs_history

	msg.submit(
	user,
	inputs=[msg, chatbot, tfs_history],
	outputs=[msg, chatbot, tfs_history],
	queue=False).then(
	bot,
	[chatbot, tfs_history],
	chatbot
	)
	clear.click(lambda: None, None, chatbot, queue=False)
	clear.click(clear_history, tfs_history, tfs_history, queue=False)

	demo.queue()
	demo.launch()