import streamlit as st from dotenv import load_dotenv from streamlit_extras.add_vertical_space import add_vertical_space from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS import pickle # from sentence_transformers import SentenceTransformer from langchain import HuggingFaceHub from langchain.chains.question_answering import load_qa_chain import os # model = SentenceTransformer('all-MiniLM-L6-v2') with st.sidebar: st.title('LLM PDF Chats') st.markdown(''' ## about - This is LLM power chatbot - By [Prathamesh Shete]('https://www.linkedin.com/in/prathameshshete') ''') add_vertical_space(5) st.write('Made By Prathamesh') load_dotenv() def main(): st.header('Chat With PDF') pdf = st.file_uploader('Upload Your PDF',type='pdf') if pdf is not None: pdf_reader = PdfReader(pdf) # st.write(pdf_reader) text = '' for page in pdf_reader.pages: text = page.extract_text() text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1000, chunk_overlap = 200, length_function = len ) chunks = text_splitter.split_text(text=text) # st.write(chunks) # embeddings store_name = pdf.name[:-4] if os.path.exists(f'{store_name}.pkl'): with open(f'{store_name}.pkl','rb') as f: VectorStore = pickle.load(f) else: embeddings = HuggingFaceEmbeddings() VectorStore = FAISS.from_texts(chunks,embedding=embeddings) with open(f'{store_name}.pkl','wb') as f: pickle.dump(VectorStore,f) # accept user query's ask_query = st.text_input('Ask question about PDF : ') if ask_query: docs = VectorStore.similarity_search(query=ask_query, k=3) # st.write(docs) llm = HuggingFaceHub(repo_id="google/flan-t5-xl", model_kwargs={"temperature": 0, "max_length": 64}) chain = load_qa_chain(llm=llm, chain_type='stuff') response = chain.run(input_documents=docs, question=ask_query) st.write(response) # st.write(text) if __name__ == "__main__": main()