|
import streamlit as st |
|
from PyPDF2 import PdfFileReader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.embeddings import GooglePalmEmbeddings |
|
from langchain.llms import GooglePalm |
|
from langchain.vectorstores import FAISS |
|
from langchain.chains import ConversationalRetrievalChain |
|
from langchain.memory import ConversationBufferMemory |
|
import os |
|
from langchain.document_loaders import UnstructuredURLLoader |
|
import tempfile |
|
|
|
|
|
os.environ['GOOGLE_API_KEY'] = 'YOUR_API_KEY' |
|
|
|
direct_links = [ |
|
"https://zollege.in/exams/comedk-uget", |
|
"https://zollege.in/exams/comedk-uget/cutoff", |
|
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16881265522765.pdf', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/lor/16884755042121.pdf', |
|
'https://www.iimrohtak.ac.in/dpm.php', |
|
'https://www.iimrohtak.ac.in/dpm-admission.php' |
|
'https://www.iimrohtak.ac.in/areas-of-specialisation.php', |
|
'https://www.iimrohtak.ac.in/financial-assistance.php', |
|
'https://www.iimrohtak.ac.in/panel/assets/images/prospectus/16903487969776.pdf', |
|
'https://www.iimrohtak.ac.in/faqs-for-dpm.php', |
|
'https://www.iimrohtak.ac.in/dpm-student.php', |
|
'https://www.iimrohtak.ac.in/publication.php', |
|
'https://www.iimrohtak.ac.in/dpm-contact.php', |
|
'https://www.iimrohtak.ac.in/ipm.php' |
|
] |
|
|
|
|
|
pdf_files = ["sample.pdf", "sample2.pdf"] |
|
|
|
def get_data(direct_links, pdf_files): |
|
direct_link_loader = UnstructuredURLLoader(urls=direct_links) |
|
direct_link_data = direct_link_loader.load() |
|
pdf_data = get_pdf_text(pdf_files) |
|
return direct_link_data + pdf_data |
|
|
|
def get_pdf_text(pdf_files): |
|
text = "" |
|
for pdf_file in pdf_files: |
|
with open(pdf_file, "rb") as file: |
|
pdf_reader = PdfFileReader(file) |
|
for page_num in range(pdf_reader.numPages): |
|
text += pdf_reader.getPage(page_num).extractText() |
|
return text |
|
|
|
def get_text_chunks(text): |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=20) |
|
chunks = text_splitter.split_text(text) |
|
return chunks |
|
|
|
def get_vector_store(text_chunks): |
|
embeddings = GooglePalmEmbeddings() |
|
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings) |
|
return vector_store |
|
|
|
def get_conversational_chain(vector_store): |
|
llm = GooglePalm() |
|
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) |
|
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=vector_store.as_retriever(), memory=memory) |
|
return conversation_chain |
|
|
|
def user_input(user_question): |
|
response = st.session_state.conversation({'question': user_question}) |
|
st.session_state.chatHistory = response['chat_history'] |
|
for i, message in enumerate(st.session_state.chatHistory): |
|
if i % 2 == 0: |
|
st.write("Human: ", message.content) |
|
else: |
|
st.write("Bot: ", message.content) |
|
|
|
def main(): |
|
st.set_page_config("Chat with Multiple PDFs") |
|
st.header("Chat with Multiple PDF π¬") |
|
user_question = st.text_input("Ask a Question from the PDF Files") |
|
if "conversation" not in st.session_state: |
|
st.session_state.conversation = None |
|
if "chatHistory" not in st.session_state: |
|
st.session_state.chatHistory = None |
|
if user_question: |
|
user_input(user_question) |
|
with st.sidebar: |
|
st.title("Settings") |
|
st.subheader("Upload your Documents") |
|
pdf_uploads = st.file_uploader("Upload your PDF Files and Click on the Process Button", accept_multiple_files=True) |
|
if st.button("Process"): |
|
with st.spinner("Processing"): |
|
pdf_files = [tempfile.NamedTemporaryFile(delete=False).name for _ in pdf_uploads] |
|
for uploaded_file, pdf_file in zip(pdf_uploads, pdf_files): |
|
with open(pdf_file, "wb") as file: |
|
file.write(uploaded_file.read()) |
|
combined_text = get_data(direct_links, pdf_files) |
|
text_chunks = get_text_chunks(combined_text) |
|
vector_store = get_vector_store(text_chunks) |
|
st.session_state.conversation = get_conversational_chain(vector_store) |
|
st.success("Done") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|