Silence1412 commited on
Commit
1ac37c2
1 Parent(s): fa0f821

Create Chat_with_pdf_LLM.py

Browse files
Files changed (1) hide show
  1. Chat_with_pdf_LLM.py +60 -0
Chat_with_pdf_LLM.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import CharacterTextSplitter
4
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains.question_answering import load_qa_chain
7
+ from langchain.llms import OpenAI
8
+ from langchain.callbacks import get_openai_callback
9
+ import os
10
+ from streamlit_chat import message
11
+
12
+ def LLM_pdf(model = 'google/flan-t5-large'):
13
+ # st.header("Ask your PDF 💬")
14
+
15
+ # upload file
16
+ pdf = st.file_uploader("Upload your PDF", type="pdf")
17
+
18
+ # extract the text
19
+ if pdf is not None:
20
+ pdf_reader = PdfReader(pdf)
21
+ text = ""
22
+ for page in pdf_reader.pages:
23
+ text += page.extract_text()
24
+
25
+ # split into chunks
26
+ text_splitter = CharacterTextSplitter(
27
+ separator="\n",
28
+ chunk_size=1000,
29
+ chunk_overlap=200,
30
+ length_function=len
31
+ )
32
+ chunks = text_splitter.split_text(text)
33
+
34
+ # create embeddings
35
+ embeddings = HuggingFaceEmbeddings()
36
+ knowledge_base = FAISS.from_texts(chunks, embeddings)
37
+
38
+ if 'generated' not in st.session_state:
39
+ st.session_state['generated'] = []
40
+ if 'past' not in st.session_state:
41
+ st.session_state['past'] = []
42
+
43
+ # show user input
44
+ user_question = st.text_input("Ask a question about your PDF:")
45
+ if user_question:
46
+ docs = knowledge_base.similarity_search(user_question)
47
+
48
+ llm = HuggingFaceHub(repo_id="google/flan-t5-large", model_kwargs={"temperature":5,
49
+ "max_length":64})
50
+ chain = load_qa_chain(llm, chain_type="stuff")
51
+ response = chain.run(input_documents=docs,question=user_question
52
+
53
+ #st.write(response)
54
+ st.session_state.past.append(user_question)
55
+ st.session_state.generated.append(response)
56
+
57
+ if st.session_state['generated']:
58
+ for i in range(len(st.session_state['generated'])-1, -1, -1):
59
+ message(st.session_state["generated"][i], key=str(i))
60
+ message(st.session_state['past'][i], is_user=True, key=str(i) + '_user')