Spaces:

santuchal
/

pdf_chat_bot

Sleeping

App Files Files Community

santuchal commited on Mar 13

Commit

eaf88b9

•

1 Parent(s): 4015508

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -30

app.py CHANGED Viewed

@@ -1,29 +1,34 @@
 import gradio as gr
 import os
-from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.vectorstores import Chroma
 from langchain.chains import ConversationalRetrievalChain
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.llms import HuggingFacePipeline
 from langchain.chains import ConversationChain
 from langchain.memory import ConversationBufferMemory
-from langchain.llms import HuggingFaceHub
 from pathlib import Path
 import chromadb
 from transformers import AutoTokenizer
 import transformers
 import torch
 import tqdm
 import accelerate
 # default_persist_directory = './chroma_HF/'
-list_llm = ["mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mistral-7B-Instruct-v0.1", \
-    "HuggingFaceH4/zephyr-7b-beta", "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
     "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
     "google/flan-t5-xxl"
 ]
@@ -98,32 +103,58 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
     # Warning: langchain issue
     # URL: https://github.com/langchain-ai/langchain/issues/6080
     if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
-        llm = HuggingFaceHub(
             repo_id=llm_model,
-            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
         )
     elif llm_model == "microsoft/phi-2":
         raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
-        llm = HuggingFaceHub(
             repo_id=llm_model,
-            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
         )
     elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
-        llm = HuggingFaceHub(
             repo_id=llm_model,
-            model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
         )
     elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
         raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
-        llm = HuggingFaceHub(
             repo_id=llm_model,
-            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
         )
     else:
-        llm = HuggingFaceHub(
             repo_id=llm_model,
             # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
-            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
         )
     progress(0.75, desc="Defining buffer memory...")
@@ -149,18 +180,36 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
     return qa_chain
 # Initialize database
 def initialize_database(list_file_obj, chunk_size, chunk_overlap, progress=gr.Progress()):
     # Create list of documents (when valid)
     list_file_path = [x.name for x in list_file_obj if x is not None]
     # Create collection_name for vector database
     progress(0.1, desc="Creating collection name...")
-    collection_name = Path(list_file_path[0]).stem
-    # Fix potential issues from naming convention
-    collection_name = collection_name.replace(" ","-")
-    collection_name = collection_name[:50]
-    # print('list_file_path: ', list_file_path)
-    print('Collection name: ', collection_name)
     progress(0.25, desc="Loading document...")
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
@@ -195,19 +244,23 @@ def conversation(qa_chain, message, history):
     # Generate response using QA chain
     response = qa_chain({"question": message, "chat_history": formatted_chat_history})
     response_answer = response["answer"]
     response_sources = response["source_documents"]
     response_source1 = response_sources[0].page_content.strip()
     response_source2 = response_sources[1].page_content.strip()
     # Langchain sources are zero-based
     response_source1_page = response_sources[0].metadata["page"] + 1
     response_source2_page = response_sources[1].metadata["page"] + 1
     # print ('chat response: ', response_answer)
     # print('DB source', response_sources)
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]
     # return gr.update(value=""), new_history, response_sources[0], response_sources[1]
-    return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page
 def upload_file(file_obj):
@@ -274,6 +327,9 @@ def demo():
                 with gr.Row():
                     doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
                     source2_page = gr.Number(label="Page", scale=1)
             with gr.Row():
                 msg = gr.Textbox(placeholder="Type message", container=True)
             with gr.Row():
@@ -287,23 +343,23 @@ def demo():
             outputs=[vector_db, collection_name, db_progress])
         qachain_btn.click(initialize_LLM, \
             inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db], \
-            outputs=[qa_chain, llm_progress]).then(lambda:[None,"",0,"",0], \
             inputs=None, \
-            outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page], \
             queue=False)
         # Chatbot events
         msg.submit(conversation, \
             inputs=[qa_chain, msg, chatbot], \
-            outputs=[qa_chain, msg, chatbot], \
             queue=False)
         submit_btn.click(conversation, \
             inputs=[qa_chain, msg, chatbot], \
-            outputs=[qa_chain, msg, chatbot], \
             queue=False)
-        clear_btn.click(lambda:[None,"",0,"",0], \
             inputs=None, \
-            outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page], \
             queue=False)
     demo.queue().launch(debug=True)

 import gradio as gr
 import os
+from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_community.vectorstores import Chroma
 from langchain.chains import ConversationalRetrievalChain
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.llms import HuggingFacePipeline
 from langchain.chains import ConversationChain
 from langchain.memory import ConversationBufferMemory
+from langchain_community.llms import HuggingFaceEndpoint
 from pathlib import Path
 import chromadb
+from unidecode import unidecode
 from transformers import AutoTokenizer
 import transformers
 import torch
 import tqdm
 import accelerate
+import re
 # default_persist_directory = './chroma_HF/'
+list_llm = ["mistralai/Mistral-7B-Instruct-v0.2", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1", \
+    "google/gemma-7b-it","google/gemma-2b-it", \
+    "HuggingFaceH4/zephyr-7b-beta", "HuggingFaceH4/zephyr-7b-gemma-v0.1", \
+    "meta-llama/Llama-2-7b-chat-hf", "microsoft/phi-2", \
     "TinyLlama/TinyLlama-1.1B-Chat-v1.0", "mosaicml/mpt-7b-instruct", "tiiuae/falcon-7b-instruct", \
     "google/flan-t5-xxl"
 ]
     # Warning: langchain issue
     # URL: https://github.com/langchain-ai/langchain/issues/6080
     if llm_model == "mistralai/Mixtral-8x7B-Instruct-v0.1":
+        llm = HuggingFaceEndpoint(
+            repo_id=llm_model,
+            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
+            temperature = temperature,
+            max_new_tokens = max_tokens,
+            top_k = top_k,
+            load_in_8bit = True,
+        )
+    elif llm_model == "HuggingFaceH4/zephyr-7b-gemma-v0.1":
+        raise gr.Error("zephyr-7b-gemma-v0.1 is too large to be loaded automatically on free inference endpoint")
+        llm = HuggingFaceEndpoint(
             repo_id=llm_model,
+            temperature = temperature,
+            max_new_tokens = max_tokens,
+            top_k = top_k,
         )
     elif llm_model == "microsoft/phi-2":
         raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
+        llm = HuggingFaceEndpoint(
             repo_id=llm_model,
+            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
+            temperature = temperature,
+            max_new_tokens = max_tokens,
+            top_k = top_k,
+            trust_remote_code = True,
+            torch_dtype = "auto",
         )
     elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
+        llm = HuggingFaceEndpoint(
             repo_id=llm_model,
+            # model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
+            temperature = temperature,
+            max_new_tokens = 250,
+            top_k = top_k,
         )
     elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
         raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
+        llm = HuggingFaceEndpoint(
             repo_id=llm_model,
+            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
+            temperature = temperature,
+            max_new_tokens = max_tokens,
+            top_k = top_k,
         )
     else:
+        llm = HuggingFaceEndpoint(
             repo_id=llm_model,
             # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
+            # model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
+            temperature = temperature,
+            max_new_tokens = max_tokens,
+            top_k = top_k,
         )
     progress(0.75, desc="Defining buffer memory...")
     return qa_chain
+# Generate collection name for vector database
+#  - Use filepath as input, ensuring unicode text
+def create_collection_name(filepath):
+    # Extract filename without extension
+    collection_name = Path(filepath).stem
+    # Fix potential issues from naming convention
+    ## Remove space
+    collection_name = collection_name.replace(" ","-")
+    ## ASCII transliterations of Unicode text
+    collection_name = unidecode(collection_name)
+    ## Remove special characters
+    #collection_name = re.findall("[\dA-Za-z]*", collection_name)[0]
+    collection_name = re.sub('[^A-Za-z0-9]+', '-', collection_name)
+    ## Limit length to 50 characters
+    collection_name = collection_name[:50]
+    ## Minimum length of 3 characters
+    if len(collection_name) < 3:
+        collection_name = collection_name + 'xyz'
+    print('Filepath: ', filepath)
+    print('Collection name: ', collection_name)
+    return collection_name
 # Initialize database
 def initialize_database(list_file_obj, chunk_size, chunk_overlap, progress=gr.Progress()):
     # Create list of documents (when valid)
     list_file_path = [x.name for x in list_file_obj if x is not None]
     # Create collection_name for vector database
     progress(0.1, desc="Creating collection name...")
+    collection_name = create_collection_name(list_file_path[0])
     progress(0.25, desc="Loading document...")
     # Load document and create splits
     doc_splits = load_doc(list_file_path, chunk_size, chunk_overlap)
     # Generate response using QA chain
     response = qa_chain({"question": message, "chat_history": formatted_chat_history})
     response_answer = response["answer"]
+    if response_answer.find("Helpful Answer:") != -1:
+        response_answer = response_answer.split("Helpful Answer:")[-1]
     response_sources = response["source_documents"]
     response_source1 = response_sources[0].page_content.strip()
     response_source2 = response_sources[1].page_content.strip()
+    response_source3 = response_sources[2].page_content.strip()
     # Langchain sources are zero-based
     response_source1_page = response_sources[0].metadata["page"] + 1
     response_source2_page = response_sources[1].metadata["page"] + 1
+    response_source3_page = response_sources[2].metadata["page"] + 1
     # print ('chat response: ', response_answer)
     # print('DB source', response_sources)
     # Append user message and response to chat history
     new_history = history + [(message, response_answer)]
     # return gr.update(value=""), new_history, response_sources[0], response_sources[1]
+    return qa_chain, gr.update(value=""), new_history, response_source1, response_source1_page, response_source2, response_source2_page, response_source3, response_source3_page
 def upload_file(file_obj):
                 with gr.Row():
                     doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
                     source2_page = gr.Number(label="Page", scale=1)
+                with gr.Row():
+                    doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
+                    source3_page = gr.Number(label="Page", scale=1)
             with gr.Row():
                 msg = gr.Textbox(placeholder="Type message", container=True)
             with gr.Row():
             outputs=[vector_db, collection_name, db_progress])
         qachain_btn.click(initialize_LLM, \
             inputs=[llm_btn, slider_temperature, slider_maxtokens, slider_topk, vector_db], \
+            outputs=[qa_chain, llm_progress]).then(lambda:[None,"",0,"",0,"",0], \
             inputs=None, \
+            outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
             queue=False)
         # Chatbot events
         msg.submit(conversation, \
             inputs=[qa_chain, msg, chatbot], \
+            outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
             queue=False)
         submit_btn.click(conversation, \
             inputs=[qa_chain, msg, chatbot], \
+            outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
             queue=False)
+        clear_btn.click(lambda:[None,"",0,"",0,"",0], \
             inputs=None, \
+            outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], \
             queue=False)
     demo.queue().launch(debug=True)