Spaces:

sfarrukh
/

qualchat

Build error

App Files Files Community

qualchat / app.py

sfarrukh

updated

471752d about 2 months ago

raw

history blame contribute delete

3.24 kB

	import gradio as gr
	import os
	from dotenv import load_dotenv
	load_dotenv()




	# Use followin json data to feed to Chroma
	import json
	with open("data/processed/final_data_for_vectorstore.json",'r') as file:
	data4chroma= json.load(file)

	# Initiate vector store
	from langchain_community.vectorstores import Chroma
	from langchain_huggingface import HuggingFaceEmbeddings
	embedding_function=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
	vectorstore=Chroma.from_texts(texts=data4chroma['chunks'],
	embedding=embedding_function,
	ids=data4chroma["chunk_ids"],
	metadatas=data4chroma["chunk_metadatas"],
	collection_name='qual_books',
	)


	from langchain_core.prompts import ChatPromptTemplate
	template="""You are a helpful AI assistant. Please answer the query based on provided context.\
	*Do not make any assumptions if you don't know the answer. In that case just respond by saying\
	the answer of query cannot be found in the given context.
	*The English of the provided text is not well-structured. You should respond with the same content but in improved, clear, and correct English, without simply copying the original text.
	*Also provide the response in bullet points but in detail where necessary.
	Context: {context}
	Query: {question}

	Answer:
	"""
	prompt= ChatPromptTemplate.from_template(template)

	from langchain_huggingface import HuggingFaceEndpoint
	llm=HuggingFaceEndpoint(repo_id="meta-llama/Meta-Llama-3.1-70B-Instruct",
	max_new_tokens=3000,
	top_k=20,
	top_p=0.95,
	typical_p=0.95,
	temperature=0.001,
	repetition_penalty=1.03,
	huggingfacehub_api_token=os.getenv("huggingfacehub_api_token")
	)
	chain = prompt \| llm


	def respond(
	query: str,
	data_type: str = "Preprocessed doc",
	llm_chain = chain,
	vectorstore=vectorstore
	):
	"""
	Generate a response to a user query using document retrieval and language model
	completion
	Parameters:
	chatbot (List): List representing the chatbot's conversation history.
	message (str): The user's query.
	data_type (str): Type of data used for document retrieval
	temperature (float);
	Returns:
	Tuple: A tuple containing an empty string, the updated chat history,
	and reference from retrieved documents
	"""
	# Retrieve embedding function from code env resources

	if data_type=="Preprocessed doc":
	retriever=vectorstore.as_retriever(search_type="mmr",
	search_kwargs={"k":10,"fetch_k":100})
	retrieved_docs=retriever.invoke(query)


	input_2_chain={"context": retrieved_docs, "question":query}

	response=llm_chain.invoke(input_2_chain)
	return response


	demo = gr.Interface(fn=respond, inputs="text", outputs="text")
	demo.launch(share=True)