Spaces:

sfarrukh
/

qualchat

Build error

App Files Files Community

sfarrukh commited on Oct 9

Commit

5da247a

•

1 Parent(s): 6eec715

recovered

Browse files

Files changed (5) hide show

app.py +82 -4
data/processed/ccr_qual.json +0 -0
data/processed/final_data_for_vectorstore.json +0 -0
data/processed/text_chunks.json +0 -0
requirements.txt +10 -0

app.py CHANGED Viewed

@@ -1,7 +1,85 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+import os
+from dotenv import load_dotenv
+load_dotenv()
+# Use followin json data to feed to Chroma
+import json
+with open("final_data_for_vectorstore.json",'r') as file:
+    data4chroma= json.load(file)
+# Initiate vector store
+from langchain_community.vectorstores import Chroma
+from langchain_huggingface import HuggingFaceEmbeddings
+embedding_function=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
+vectorstore=Chroma.from_texts(texts=data4chroma['chunks'],
+                             embedding=embedding_function,
+                             ids=data4chroma["chunk_ids"],
+                             metadatas=data4chroma["chunk_metadatas"],
+                             collection_name='qual_books',
+                             )
+from langchain_core.prompts import ChatPromptTemplate
+template="""You are a helpful AI assistant. Please answer the query based on provided context.\
+         *Do not make any assumptions if you don't know the answer. In that case just respond by saying\
+          the answer of query cannot be found in the given context.
+         *The English of the provided text is not well-structured. You should respond with the same content but in improved, clear, and correct English, without simply copying the original text.
+         *Also provide the response in bullet points but in detail where necessary.
+    Context: {context}
+    Query: {question}
+    Answer:
+         """
+prompt= ChatPromptTemplate.from_template(template)
+from langchain_huggingface import HuggingFaceEndpoint
+llm=HuggingFaceEndpoint(repo_id="meta-llama/Meta-Llama-3.1-70B-Instruct",
+                               max_new_tokens=3000,
+                              top_k=20,
+                                top_p=0.95,
+                                typical_p=0.95,
+                                temperature=0.001,
+                                repetition_penalty=1.03,
+                                huggingfacehub_api_token=os.getenv("huggingfacehub_api_token")
+                                 )
+chain = prompt | llm
+def respond(
+            query: str,
+            data_type: str = "Preprocessed doc",
+            llm_chain = chain,
+            vectorstore=vectorstore
+            ):
+    """
+    Generate a response to a user query using document retrieval and language model
+    completion
+    Parameters:
+    chatbot (List): List representing the chatbot's conversation history.
+    message (str): The user's query.
+    data_type (str): Type of data used for document retrieval
+    temperature (float);
+    Returns:
+    Tuple: A tuple containing an empty string, the updated chat history,
+    and reference from retrieved documents
+    """
+    # Retrieve embedding function from code env resources
+    if data_type=="Preprocessed doc":
+        retriever=vectorstore.as_retriever(search_type="mmr",
+                              search_kwargs={"k":10,"fetch_k":100})
+        retrieved_docs=retriever.invoke(query)
+        input_2_chain={"context": retrieved_docs, "question":query}
+        response=llm_chain.invoke(input_2_chain)
+    return  response
+demo = gr.Interface(fn=respond, inputs="text", outputs="text")
+demo.launch(share=True)

data/processed/ccr_qual.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/processed/final_data_for_vectorstore.json ADDED Viewed

The diff for this file is too large to render. See raw diff

data/processed/text_chunks.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+chromadb
+langchain
+langchain_community
+langchain-huggingface
+langchain_chroma
+gradio
+gradio_client
+python-dotenv
+sentence-transformers
+huggingface