Parth211 commited on
Commit
ae2357f
1 Parent(s): 7b25a58

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -0
app.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from dotenv import load_dotenv
3
+ from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
+ from langchain.vectorstores import FAISS
7
+ from langchain.chat_models import ChatOpenAI
8
+ from langchain.memory import ConversationBufferMemory
9
+ from langchain.chains import ConversationalRetrievalChain
10
+ from langchain.llms import HuggingFaceHub
11
+ # from htmlTemplates import css,user_template,bot_template
12
+ from langchain.embeddings import HuggingFaceEmbeddings
13
+
14
+
15
+
16
+ def get_pdf_text(pdf_docs):
17
+ text = ""
18
+
19
+ for pdf in pdf_docs:
20
+ pdf_reader = PdfReader(pdf)
21
+ for page in pdf_reader.pages:
22
+ text+=page.extract_text()
23
+ return text
24
+
25
+ def get_text_chunks(text):
26
+ text_splitter = CharacterTextSplitter(
27
+ separator = '\n',
28
+ chunk_size = 1000,
29
+ chunk_overlap = 200,
30
+ # length_fucntion = len(text)
31
+ )
32
+ chuncks = text_splitter.split_text(text)
33
+ return chuncks
34
+
35
+
36
+
37
+ def get_vectorstore(text_chunks):
38
+ embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
39
+ vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
40
+ return vectorstore
41
+
42
+
43
+ def get_conversations_chain(vectorstores):
44
+ llm = HuggingFaceHub(repo_id ='mistralai/Mistral-7B-Instruct-v0.3',model_kwargs={"temperature":0.5, "max_length":512},huggingfacehub_api_token=API_KEY)
45
+
46
+
47
+ memory = ConversationBufferMemory(
48
+ memory_key = 'chat_history',return_messages = True
49
+ )
50
+ conversation_chain = ConversationalRetrievalChain.from_llm(
51
+ llm = llm,
52
+ retriever = vectorstores.as_retriever(),
53
+ memory = memory,
54
+ )
55
+
56
+ return conversation_chain
57
+
58
+
59
+
60
+ def handle_userinput(user_question):
61
+ response = st.session_state.conversation({'question':user_question})
62
+ st.session_state.chat_history = response['chat_history']
63
+
64
+
65
+ for i,message in enumerate(st.session_state.chat_history):
66
+ if i%2==0:
67
+ st.write(user_template.replace(
68
+ "{{MSG}}", message.content), unsafe_allow_html=True)
69
+ else:
70
+ st.write(bot_template.replace(
71
+ "{{MSG}}", message.content), unsafe_allow_html=True)
72
+
73
+
74
+
75
+
76
+
77
+ def main():
78
+ load_dotenv()
79
+ st.set_page_config(page_title="Chat with multiple PDFs",
80
+ page_icon=":books:")
81
+ # st.write(css, unsafe_allow_html=True)
82
+
83
+ if "conversation" not in st.session_state:
84
+ st.session_state.conversation = None
85
+ if "chat_history" not in st.session_state:
86
+ st.session_state.chat_history = None
87
+
88
+ st.header("Chat with multiple PDFs :books:")
89
+ user_question = st.text_input("Ask a question about your documents:")
90
+ if user_question:
91
+ handle_userinput(user_question)
92
+
93
+ with st.sidebar:
94
+ st.subheader("Your documents")
95
+ pdf_docs = st.file_uploader(
96
+ "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
97
+ if st.button("Process"):
98
+ with st.spinner("Processing"):
99
+ # get pdf text
100
+ raw_text = get_pdf_text(pdf_docs)
101
+
102
+ # get the text chunks
103
+ text_chunks = get_text_chunks(raw_text)
104
+
105
+ # create vector store
106
+ vectorstore = get_vectorstore(text_chunks)
107
+
108
+ # create conversation chain
109
+ st.session_state.conversation = get_conversations_chain(
110
+ vectorstore)
111
+
112
+
113
+ if __name__ == '__main__':
114
+ main()