import panel as pn from langchain.llms import LlamaCpp from langchain_core.callbacks import CallbackManager from langchain_core.prompts import PromptTemplate from setup import OLMO_MODEL template = """<|user|>{contents}<|user|>""" prompt = PromptTemplate.from_template(template) pn.extension(design="bootstrap", sizing_mode="stretch_width") model_path = OLMO_MODEL model_name = "OLMo" model_avatar = "🌳" def callback(contents, user, instance): callback_handler = pn.chat.langchain.PanelCallbackHandler(instance, user=model_name, avatar=model_avatar) # Not return the result at the end of the generation # this prevents the model from repeating the result callback_handler.on_llm_end = lambda response, *args, **kwargs: None # Callbacks support token-wise streaming callback_manager = CallbackManager([callback_handler]) llm = LlamaCpp( model_path=str(model_path), callback_manager=callback_manager, temperature=0.8, max_tokens=512, verbose=False, echo=False ) llm_chain = prompt | llm llm_chain.invoke({"contents": contents}) chat_interface = pn.chat.ChatInterface(callback=callback) # create dashboard main = pn.WidgetBox( f"##### {model_avatar} Chat with {model_name} with the interface below!", pn.Column(chat_interface, height=500, scroll=True) ) title = f"{model_name} Chat Demo" pn.template.BootstrapTemplate( title=title, main=main, main_max_width="100%", ).servable(title=title)