qualchat / app.py
sfarrukh's picture
updated
471752d
import gradio as gr
import os
from dotenv import load_dotenv
load_dotenv()
# Use followin json data to feed to Chroma
import json
with open("data/processed/final_data_for_vectorstore.json",'r') as file:
data4chroma= json.load(file)
# Initiate vector store
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
embedding_function=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
vectorstore=Chroma.from_texts(texts=data4chroma['chunks'],
embedding=embedding_function,
ids=data4chroma["chunk_ids"],
metadatas=data4chroma["chunk_metadatas"],
collection_name='qual_books',
)
from langchain_core.prompts import ChatPromptTemplate
template="""You are a helpful AI assistant. Please answer the query based on provided context.\
*Do not make any assumptions if you don't know the answer. In that case just respond by saying\
the answer of query cannot be found in the given context.
*The English of the provided text is not well-structured. You should respond with the same content but in improved, clear, and correct English, without simply copying the original text.
*Also provide the response in bullet points but in detail where necessary.
Context: {context}
Query: {question}
Answer:
"""
prompt= ChatPromptTemplate.from_template(template)
from langchain_huggingface import HuggingFaceEndpoint
llm=HuggingFaceEndpoint(repo_id="meta-llama/Meta-Llama-3.1-70B-Instruct",
max_new_tokens=3000,
top_k=20,
top_p=0.95,
typical_p=0.95,
temperature=0.001,
repetition_penalty=1.03,
huggingfacehub_api_token=os.getenv("huggingfacehub_api_token")
)
chain = prompt | llm
def respond(
query: str,
data_type: str = "Preprocessed doc",
llm_chain = chain,
vectorstore=vectorstore
):
"""
Generate a response to a user query using document retrieval and language model
completion
Parameters:
chatbot (List): List representing the chatbot's conversation history.
message (str): The user's query.
data_type (str): Type of data used for document retrieval
temperature (float);
Returns:
Tuple: A tuple containing an empty string, the updated chat history,
and reference from retrieved documents
"""
# Retrieve embedding function from code env resources
if data_type=="Preprocessed doc":
retriever=vectorstore.as_retriever(search_type="mmr",
search_kwargs={"k":10,"fetch_k":100})
retrieved_docs=retriever.invoke(query)
input_2_chain={"context": retrieved_docs, "question":query}
response=llm_chain.invoke(input_2_chain)
return response
demo = gr.Interface(fn=respond, inputs="text", outputs="text")
demo.launch(share=True)