File size: 1,506 Bytes
4e19582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fb30fd
4e19582
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import panel as pn
from langchain.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager
from langchain_core.prompts import PromptTemplate

from setup import OLMO_MODEL

template = """<|user|>{contents}<|user|>"""
prompt = PromptTemplate.from_template(template)

pn.extension(design="bootstrap", sizing_mode="stretch_width")

model_path = OLMO_MODEL
model_name = "OLMo"
model_avatar = "🌳"

def callback(contents, user, instance):
    callback_handler = pn.chat.langchain.PanelCallbackHandler(instance, user=model_name, avatar=model_avatar)
    # Not return the result at the end of the generation
    # this prevents the model from repeating the result
    callback_handler.on_llm_end = lambda response, *args, **kwargs: None
    # Callbacks support token-wise streaming
    callback_manager = CallbackManager([callback_handler])
    llm = LlamaCpp(
        model_path=str(model_path),
        callback_manager=callback_manager,
        temperature=0.8,
        max_tokens=512,
        verbose=False,
        echo=False
    )
    llm_chain = prompt | llm
    llm_chain.invoke({"contents": contents})

chat_interface = pn.chat.ChatInterface(callback=callback)

# create dashboard
main = pn.WidgetBox(
    f"##### {model_avatar} Chat with {model_name} with the interface below!",
    pn.Column(chat_interface, height=500, scroll=True)
)

title = f"{model_name} Chat Demo"
pn.template.BootstrapTemplate(
    title=title,
    main=main,
    main_max_width="100%",
).servable(title=title)