Spaces:

jayant012
/

Smart-Assistant

Runtime error

App Files Files Community

Smart-Assistant / app.py

jayant012

Update app.py

787f08a verified 6 months ago

raw

history blame

3.39 kB

	import gradio as gr
	import os
	from langchain_community.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.chat_models import ChatOpenAI

	from langchain.retrievers.document_compressors import LLMChainExtractor
	from langchain.retrievers.multi_query import MultiQueryRetriever
	from langchain.retrievers import ContextualCompressionRetriever
	from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate



	from langchain.vectorstores import Chroma


	chat = ChatOpenAI()

	embedding_function = HuggingFaceEmbeddings(model_name = "BAAI/bge-large-en-v1.5",model_kwargs={'device': 'cpu'},encode_kwargs={"normalize_embeddings": True})

	def add_docs(path):

	loader = PyPDFLoader(file_path=path)
	docs = loader.load_and_split(text_splitter=RecursiveCharacterTextSplitter(chunk_size = 500,
	chunk_overlap = 100,
	length_function = len,
	is_separator_regex=False))
	model_vectorstore = Chroma
	db = model_vectorstore.from_documents(documents=docs,embedding= embedding_function, persist_directory="output/general_knowledge")
	return db


	def answer_query(message, chat_history):
	base_compressor = LLMChainExtractor.from_llm(chat)
	db = Chroma(persist_directory = "output/general_knowledge", embedding_function=embedding_function)
	base_retriever = db.as_retriever()
	mq_retriever = MultiQueryRetriever.from_llm(retriever = base_retriever, llm=chat)
	compression_retriever = ContextualCompressionRetriever(base_compressor=base_compressor, base_retriever=mq_retriever)

	matched_docs = compression_retriever.get_relevant_documents(query = message)

	context = ""

	for doc in matched_docs:
	page_content = doc.page_content
	context+=page_content
	context += "\n\n"
	template = """
	Answer the following question only by using the context given below in the triple backticks, do not use any other information to answer the question.
	If you can't answer the given question with the given context, you can return an emtpy string ('')

	Context: ```{context}```
	----------------------------
	Question: {query}
	----------------------------
	Answer: """

	human_message_prompt = HumanMessagePromptTemplate.from_template(template=template)
	chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
	prompt = chat_prompt.format_prompt(query = message, context = context)
	response = chat(messages=prompt.to_messages()).content

	chat_history.append((message,response))
	return "", chat_history



	with gr.Blocks() as demo:
	gr.HTML("<h1 align = 'center'>Smart Assistant</h1>")
	gr.HTML("<h2 align = 'center'>Upload any PDF and ask your questions.</h2>")

	with gr.Row():

	upload_files = gr.File(label = 'Upload a PDF',file_types=['.pdf'],file_count='single')

	chatbot = gr.Chatbot()
	msg = gr.Textbox(label = "Enter your question here")
	upload_files.upload(add_docs,upload_files)
	msg.submit(answer_query,[msg,chatbot],[msg,chatbot])


	if __name__ == "__main__":
	demo.launch()