Spaces:

NanBags
/

Gen_AI_RAG_PDF_CHATBOT

Running

App Files Files Community

NanBags commited on May 21

Commit

c9a97bb

•

1 Parent(s): ae2b92b

Create app.py

Browse files

This is the app.py file

Files changed (1) hide show

app.py +128 -0

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+import os
+import shutil
+import streamlit as st
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_community.vectorstores import FAISS
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain_community.llms import Together
+from langchain_community.document_loaders import UnstructuredPDFLoader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.embeddings import HuggingFaceEmbeddings
+os.environ["TOGETHER_API_KEY"] = os.getenv("TOGETHER_API_KEY")
+def inference(chain, input_query):
+    """Invoke the processing chain with the input query."""
+    result = chain.invoke(input_query)
+    return result
+def create_chain(retriever, prompt, model):
+    """Compose the processing chain with the specified components."""
+    chain = (
+        {"context": retriever, "question": RunnablePassthrough()}
+        | prompt
+        | model
+        | StrOutputParser()
+    )
+    return chain
+def generate_prompt():
+    """Define the prompt template for question answering."""
+    template = """<s>[INST] Answer the question in a simple sentence based only on the following context:
+                  {context}
+                  Question: {question} [/INST]
+               """
+    return ChatPromptTemplate.from_template(template)
+def configure_model():
+    """Configure the language model with specified parameters."""
+    return Together(
+        model="mistralai/Mixtral-8x7B-Instruct-v0.1",
+        temperature=0.1,
+        max_tokens=3000,
+        top_k=50,
+        top_p=0.7,
+        repetition_penalty=1.1,
+    )
+def configure_retriever(pdf_loader):
+    """Configure the retriever with embeddings and a FAISS vector store."""
+    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    vector_db = FAISS.from_documents(pdf_loader, embeddings)
+    return vector_db.as_retriever()
+def load_documents(path):
+    """Load and preprocess documents from PDF files located at the specified path."""
+    pdf_loader = []
+    for file in os.listdir(path):
+        if file.endswith('.pdf'):
+            filepath = os.path.join(path, file)
+            loader = UnstructuredPDFLoader(filepath)
+            documents = loader.load()
+            text_splitter = CharacterTextSplitter(chunk_size=18000, chunk_overlap=10)
+            docs = text_splitter.split_documents(documents)
+            pdf_loader.extend(docs)
+    return pdf_loader
+def process_document(path, input_query):
+    """Process the document by setting up the chain and invoking it with the input query."""
+    pdf_loader = load_documents(path)
+    llm_model = configure_model()
+    prompt = generate_prompt()
+    retriever = configure_retriever(pdf_loader)
+    chain = create_chain(retriever, prompt, llm_model)
+    response = inference(chain, input_query)
+    return response
+def main():
+    """Main function to run the Streamlit app."""
+    tmp_folder = '/tmp/1'
+    os.makedirs(tmp_folder,exist_ok=True)
+    st.title("Q&A PDF AI RAG Chatbot")
+    uploaded_files = st.sidebar.file_uploader("Choose PDF files", accept_multiple_files=True, type='pdf')
+    if uploaded_files:
+        for file in uploaded_files:
+            with open(os.path.join(tmp_folder, file.name), 'wb') as f:
+                f.write(file.getbuffer())
+        st.success('File successfully uploaded. Start prompting!')
+    if 'chat_history' not in st.session_state:
+        st.session_state.chat_history = []
+    if uploaded_files:
+        with st.form(key='question_form'):
+            user_query = st.text_input("Ask a question:", key="query_input")
+            if st.form_submit_button("Ask") and user_query:
+                response = process_document(tmp_folder, user_query)
+                st.session_state.chat_history.append({"question": user_query, "answer": response})
+        if st.button("Clear Chat History"):
+            st.session_state.chat_history = []
+        for chat in st.session_state.chat_history:
+            st.markdown(f"**Q:** {chat['question']}")
+            st.markdown(f"**A:** {chat['answer']}")
+            st.markdown("---")
+    else:
+        st.success('Upload Document to Start Process !')
+    if st.sidebar.button("REMOVE UPLOADED FILES"):
+        document_count = os.listdir(tmp_folder)
+        if len(document_count) > 0:
+            shutil.rmtree(tmp_folder)
+            st.sidebar.write("FILES DELETED SUCCESSFULLY !!!")
+        else:
+            st.sidebar.write("NO DOCUMENT FOUND TO DELETE !!! PLEASE UPLOAD DOCUMENTS TO START PROCESS !! ")
+if __name__ == "__main__":
+    main()