Spaces:
Runtime error
Runtime error
File size: 3,391 Bytes
424ff00 787f08a 424ff00 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers import ContextualCompressionRetriever
from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate
from langchain.vectorstores import Chroma
chat = ChatOpenAI()
embedding_function = HuggingFaceEmbeddings(model_name = "BAAI/bge-large-en-v1.5",model_kwargs={'device': 'cpu'},encode_kwargs={"normalize_embeddings": True})
def add_docs(path):
loader = PyPDFLoader(file_path=path)
docs = loader.load_and_split(text_splitter=RecursiveCharacterTextSplitter(chunk_size = 500,
chunk_overlap = 100,
length_function = len,
is_separator_regex=False))
model_vectorstore = Chroma
db = model_vectorstore.from_documents(documents=docs,embedding= embedding_function, persist_directory="output/general_knowledge")
return db
def answer_query(message, chat_history):
base_compressor = LLMChainExtractor.from_llm(chat)
db = Chroma(persist_directory = "output/general_knowledge", embedding_function=embedding_function)
base_retriever = db.as_retriever()
mq_retriever = MultiQueryRetriever.from_llm(retriever = base_retriever, llm=chat)
compression_retriever = ContextualCompressionRetriever(base_compressor=base_compressor, base_retriever=mq_retriever)
matched_docs = compression_retriever.get_relevant_documents(query = message)
context = ""
for doc in matched_docs:
page_content = doc.page_content
context+=page_content
context += "\n\n"
template = """
Answer the following question only by using the context given below in the triple backticks, do not use any other information to answer the question.
If you can't answer the given question with the given context, you can return an emtpy string ('')
Context: ```{context}```
----------------------------
Question: {query}
----------------------------
Answer: """
human_message_prompt = HumanMessagePromptTemplate.from_template(template=template)
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt])
prompt = chat_prompt.format_prompt(query = message, context = context)
response = chat(messages=prompt.to_messages()).content
chat_history.append((message,response))
return "", chat_history
with gr.Blocks() as demo:
gr.HTML("<h1 align = 'center'>Smart Assistant</h1>")
gr.HTML("<h2 align = 'center'>Upload any PDF and ask your questions.</h2>")
with gr.Row():
upload_files = gr.File(label = 'Upload a PDF',file_types=['.pdf'],file_count='single')
chatbot = gr.Chatbot()
msg = gr.Textbox(label = "Enter your question here")
upload_files.upload(add_docs,upload_files)
msg.submit(answer_query,[msg,chatbot],[msg,chatbot])
if __name__ == "__main__":
demo.launch() |