import gradio as gr
import torch
from transformers import pipeline
generate_text = pipeline(model="databricks/dolly-v2-3b", torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
theme = gr.themes.Soft(
primary_hue=gr.themes.Color("#ededed", "#fee2e2", "#fecaca", "#fca5a5", "#f87171", "#ef4444", "#dc2626", "#b91c1c", "#991b1b", "#7f1d1d", "#6c1e1e"),
neutral_hue="red",
)
title = """
Chat with awesome Dolly V2 3B model
🏃♂️💨Streaming with Transformers & Gradio💪
"""
with gr.Blocks(theme=theme) as demo:
gr.HTML(title)
gr.HTML("This is run in cpu, so it might be a bit slow. Also, the model is great with language but might fail answering questions with numbers. If you want to skip waiting time, just clone the app.")
Dolly = gr.Chatbot().style(height=500)
msg = gr.Textbox()
clear = gr.Button("Clear")
def respond(message, chat_history):
res = generate_text(message)
a = (res[0]["generated_text"])
b = str(a)
bot_message = b
chat_history.append((message, bot_message))
return "", chat_history
msg.submit(respond, [msg, Dolly], [msg, Dolly])
clear.click(lambda: None, None, Dolly, queue=False)
demo.launch(debug=True)