Almaatla commited on
Commit
ae5ea44
1 Parent(s): 9e87c9c

updated chunk size, number of retrieved docs & use llama3 in stream

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -31,8 +31,8 @@ def tiktoken_len(text):
31
  return len(tokens)
32
 
33
  text_splitter = RecursiveCharacterTextSplitter(
34
- chunk_size=800,
35
- chunk_overlap=400,
36
  length_function=tiktoken_len,
37
  separators=["\n\n", "\n", " ", ""]
38
  )
@@ -257,7 +257,7 @@ def ask_llm(system, user_input):
257
  client = Groq(api_key=os.environ["GROQ_KEY"])
258
  chat_completion = client.chat.completions.create(
259
  messages=messages,
260
- model='mixtral-8x7b-32768',
261
  )
262
  return chat_completion.choices[0].message.content
263
 
@@ -307,7 +307,7 @@ def ask_gpt(query, ui_session_id, history):
307
  print(f"SESSION: {session_id} database does not exist")
308
  return f"SESSION: {session_id} database does not exist","",""
309
 
310
- docs = db.similarity_search(query, k=5)
311
 
312
  documents = "\n\n*-*-*-*-*-*\n\n".join(f"Content: {doc.page_content}\n" for doc in docs)
313
  system = f"# Instructions\nTake a deep breath and resonate step by step.\nYou are a helpful standard assistant. Your have only one mission and that consists in answering to the user input based on the **provided documents**. If the answer to the question that is asked by the user isn't contained in the **provided documents**, say so but **don't make up an answer**. I chose you because you can say 'I don't know' so please don't do like the other LLMs and don't define acronyms that aren\'t present in the following **PROVIDED DOCUMENTS** double check if it is present before answering. If some of the information can be useful for the user you can tell him.\nFinish your response by **ONE** follow up question that the provided documents could answer.\n\nThe documents are separated by the string \'*-*-*-*-*-*\'. Do not provide any explanations or details.\n\n# **Provided documents**: {documents}."
 
31
  return len(tokens)
32
 
33
  text_splitter = RecursiveCharacterTextSplitter(
34
+ chunk_size=750,
35
+ chunk_overlap=350,
36
  length_function=tiktoken_len,
37
  separators=["\n\n", "\n", " ", ""]
38
  )
 
257
  client = Groq(api_key=os.environ["GROQ_KEY"])
258
  chat_completion = client.chat.completions.create(
259
  messages=messages,
260
+ model="llama3-70b-8192",#'mixtral-8x7b-32768',
261
  )
262
  return chat_completion.choices[0].message.content
263
 
 
307
  print(f"SESSION: {session_id} database does not exist")
308
  return f"SESSION: {session_id} database does not exist","",""
309
 
310
+ docs = db.similarity_search(query, k=4)
311
 
312
  documents = "\n\n*-*-*-*-*-*\n\n".join(f"Content: {doc.page_content}\n" for doc in docs)
313
  system = f"# Instructions\nTake a deep breath and resonate step by step.\nYou are a helpful standard assistant. Your have only one mission and that consists in answering to the user input based on the **provided documents**. If the answer to the question that is asked by the user isn't contained in the **provided documents**, say so but **don't make up an answer**. I chose you because you can say 'I don't know' so please don't do like the other LLMs and don't define acronyms that aren\'t present in the following **PROVIDED DOCUMENTS** double check if it is present before answering. If some of the information can be useful for the user you can tell him.\nFinish your response by **ONE** follow up question that the provided documents could answer.\n\nThe documents are separated by the string \'*-*-*-*-*-*\'. Do not provide any explanations or details.\n\n# **Provided documents**: {documents}."