thunder-007's picture
Update app.py
4e97e1e verified
raw
history blame contribute delete
930 Bytes
import gradio as gr
import time
from llm import Gemma2B
llm = Gemma2B()
def inference(message, history, system_prompt, tokens):
chat_template = []
for user, model in history:
chat_template = [
{"role": "user", "content": user},
{"role": "model", "content": model},
]
response = llm.inference_cpu(
chat_template + [{"role": "user", "content": message}
]).split("<start_of_turn>")[-1].strip("model").strip("<eos>")
for i in range(max(len(response), int(tokens))):
time.sleep(0.05)
yield response[: i + 1]
demo = gr.ChatInterface(inference,
additional_inputs=[
gr.Textbox("You are helpful AI.", label="System Prompt"),
gr.Slider(10, 200, 100, label="Tokens")
]
)
demo.queue().launch(debug=True)