from time import perf_counter from jinja2 import Template from backend.semantic_search import qd_retriever template_string = """ Instructions: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: --- {% for doc in documents %} {{ doc.content }} --- {% endfor %} Query: {{ query }} """ md_template_string = """ Instructions: Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Context: {% for doc in documents %}

Doc {{ loop.index }}

{{ doc.content[:50] }}... {{ doc.content }}

{% endfor %} Query: {{ query }} """ template = Template(template_string) md_template = Template(md_template_string) import gradio as gr from backend.query_llm import generate def add_text(history, text): history = [] if history is None else history history = history + [(text, None)] return history, gr.Textbox(value="", interactive=False) def bot(history, system_prompt=""): top_k = 5 query = history[-1][0] # Retrieve documents relevant to query document_start = perf_counter() documents = qd_retriever.retrieve(query, top_k=top_k) document_time = document_start - perf_counter() # Create Prompt prompt = template.render(documents=documents, query=query) md_prompt = md_template.render(documents=documents, query=query) # # Query LLM with prompt based on relevant documents # llm_start = perf_counter() # result = generate(prompt=prompt, history='') # llm_time = llm_start - perf_counter() # times = (document_time, llm_time) history[-1][1] = "" for character in generate(prompt, history[:-1]): history[-1][1] = character yield history, md_prompt with gr.Blocks() as demo: with gr.Tab("Application"): chatbot = gr.Chatbot( [], elem_id="chatbot", avatar_images=('examples/lama.jpeg', 'examples/lama2.jpeg'), bubble_full_width=False, show_copy_button=True, show_share_button=True, ) with gr.Row(): txt = gr.Textbox( scale=3, show_label=False, placeholder="Enter text and press enter", container=False, ) txt_btn = gr.Button(value="Submit text", scale=1) prompt_md = gr.HTML() # Turn off interactivity while generating if you hit enter txt_msg = txt_btn.click(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( bot, chatbot, [chatbot, prompt_md]) # Turn it back on txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False) # Turn off interactivity while generating if you hit enter txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( bot, chatbot, [chatbot, prompt_md]) # Turn it back on txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False) gr.Examples(['What is the largest city on earth?', 'Who has the record for the fastest mile?'], txt) demo.queue() demo.launch(debug=True)