import re import logging import json from langchain.schema import ( HumanMessage, SystemMessage, ) def save_logs(scheduler, JSON_DATASET_PATH, logs) -> None: """ Every interaction with app saves the log of question and answer, this is to get the usage statistics of app and evaluate model performances """ with scheduler.lock: with JSON_DATASET_PATH.open("a") as f: json.dump(logs, f) f.write("\n") print("logging done") def get_message_template(type, SYSTEM_PROMPT, USER_PROMPT): if type == 'NVIDIA': messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role":"user","content":USER_PROMPT}] elif type == 'DEDICATED': messages = [ SystemMessage(content=SYSTEM_PROMPT), HumanMessage(content=USER_PROMPT),] else: messages = None return messages def make_html_source(source,i): """ takes the text and converts it into html format for display in "source" side tab """ meta = source.metadata content = source.page_content.strip() name = meta['filename'] card = f"""

Doc {i} - {meta['filename']} - Page {int(meta['page'])}

{content}

{name} 🔗

""" return card def parse_output_llm_with_sources(output): # Split the content into a list of text and "[Doc X]" references content_parts = re.split(r'\[(Doc\s?\d+(?:,\s?Doc\s?\d+)*)\]', output) parts = [] for part in content_parts: if part.startswith("Doc"): subparts = part.split(",") subparts = [subpart.lower().replace("doc","").strip() for subpart in subparts] subparts = [f"""^{subpart}""" for subpart in subparts] parts.append("".join(subparts)) else: parts.append(part) content_parts = "".join(parts) return content_parts