sfarrukh commited on
Commit
5da247a
1 Parent(s): 6eec715
app.py CHANGED
@@ -1,7 +1,85 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ from dotenv import load_dotenv
4
+ load_dotenv()
5
 
 
 
6
 
7
+
8
+
9
+ # Use followin json data to feed to Chroma
10
+ import json
11
+ with open("final_data_for_vectorstore.json",'r') as file:
12
+ data4chroma= json.load(file)
13
+
14
+ # Initiate vector store
15
+ from langchain_community.vectorstores import Chroma
16
+ from langchain_huggingface import HuggingFaceEmbeddings
17
+ embedding_function=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
18
+ vectorstore=Chroma.from_texts(texts=data4chroma['chunks'],
19
+ embedding=embedding_function,
20
+ ids=data4chroma["chunk_ids"],
21
+ metadatas=data4chroma["chunk_metadatas"],
22
+ collection_name='qual_books',
23
+ )
24
+
25
+
26
+ from langchain_core.prompts import ChatPromptTemplate
27
+ template="""You are a helpful AI assistant. Please answer the query based on provided context.\
28
+ *Do not make any assumptions if you don't know the answer. In that case just respond by saying\
29
+ the answer of query cannot be found in the given context.
30
+ *The English of the provided text is not well-structured. You should respond with the same content but in improved, clear, and correct English, without simply copying the original text.
31
+ *Also provide the response in bullet points but in detail where necessary.
32
+ Context: {context}
33
+ Query: {question}
34
+
35
+ Answer:
36
+ """
37
+ prompt= ChatPromptTemplate.from_template(template)
38
+
39
+ from langchain_huggingface import HuggingFaceEndpoint
40
+ llm=HuggingFaceEndpoint(repo_id="meta-llama/Meta-Llama-3.1-70B-Instruct",
41
+ max_new_tokens=3000,
42
+ top_k=20,
43
+ top_p=0.95,
44
+ typical_p=0.95,
45
+ temperature=0.001,
46
+ repetition_penalty=1.03,
47
+ huggingfacehub_api_token=os.getenv("huggingfacehub_api_token")
48
+ )
49
+ chain = prompt | llm
50
+
51
+
52
+ def respond(
53
+ query: str,
54
+ data_type: str = "Preprocessed doc",
55
+ llm_chain = chain,
56
+ vectorstore=vectorstore
57
+ ):
58
+ """
59
+ Generate a response to a user query using document retrieval and language model
60
+ completion
61
+ Parameters:
62
+ chatbot (List): List representing the chatbot's conversation history.
63
+ message (str): The user's query.
64
+ data_type (str): Type of data used for document retrieval
65
+ temperature (float);
66
+ Returns:
67
+ Tuple: A tuple containing an empty string, the updated chat history,
68
+ and reference from retrieved documents
69
+ """
70
+ # Retrieve embedding function from code env resources
71
+
72
+ if data_type=="Preprocessed doc":
73
+ retriever=vectorstore.as_retriever(search_type="mmr",
74
+ search_kwargs={"k":10,"fetch_k":100})
75
+ retrieved_docs=retriever.invoke(query)
76
+
77
+
78
+ input_2_chain={"context": retrieved_docs, "question":query}
79
+
80
+ response=llm_chain.invoke(input_2_chain)
81
+ return response
82
+
83
+
84
+ demo = gr.Interface(fn=respond, inputs="text", outputs="text")
85
+ demo.launch(share=True)
data/processed/ccr_qual.json ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/final_data_for_vectorstore.json ADDED
The diff for this file is too large to render. See raw diff
 
data/processed/text_chunks.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ chromadb
2
+ langchain
3
+ langchain_community
4
+ langchain-huggingface
5
+ langchain_chroma
6
+ gradio
7
+ gradio_client
8
+ python-dotenv
9
+ sentence-transformers
10
+ huggingface