Spaces:

thunder-007
/

google-gemma-7b-it

Runtime error

google-gemma-7b-it / app.py

Update app.py

4e97e1e verified 9 months ago

930 Bytes

	import gradio as gr
	import time
	from llm import Gemma2B

	llm = Gemma2B()


	def inference(message, history, system_prompt, tokens):
	chat_template = []
	for user, model in history:
	chat_template = [
	{"role": "user", "content": user},
	{"role": "model", "content": model},
	]
	response = llm.inference_cpu(
	chat_template + [{"role": "user", "content": message}
	]).split("<start_of_turn>")[-1].strip("model").strip("<eos>")
	for i in range(max(len(response), int(tokens))):
	time.sleep(0.05)
	yield response[: i + 1]


	demo = gr.ChatInterface(inference,
	additional_inputs=[
	gr.Textbox("You are helpful AI.", label="System Prompt"),
	gr.Slider(10, 200, 100, label="Tokens")
	]
	)
	demo.queue().launch(debug=True)