Spaces:
Runtime error
Runtime error
import gradio as gr | |
import time | |
from llm import Gemma2B | |
llm = Gemma2B() | |
def inference(message, history, system_prompt, tokens): | |
chat_template = [] | |
for user, model in history: | |
chat_template = [ | |
{"role": "user", "content": user}, | |
{"role": "model", "content": model}, | |
] | |
response = llm.inference_cpu( | |
chat_template + [{"role": "user", "content": message} | |
]).split("<start_of_turn>")[-1].strip("model").strip("<eos>") | |
for i in range(max(len(response), int(tokens))): | |
time.sleep(0.05) | |
yield response[: i + 1] | |
demo = gr.ChatInterface(inference, | |
additional_inputs=[ | |
gr.Textbox("You are helpful AI.", label="System Prompt"), | |
gr.Slider(10, 200, 100, label="Tokens") | |
] | |
) | |
demo.queue().launch(debug=True) | |