# streamlit_app.py import streamlit as st import os import openai from langchain.vectorstores import Chroma from langchain.embeddings.openai import OpenAIEmbeddings from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.chains import RetrievalQA from langchain.prompts import PromptTemplate from langchain_community.document_loaders import PyPDFLoader from langchain_openai import ChatOpenAI from langchain.callbacks import get_openai_callback from io import BytesIO from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate import chromadb openai.api_key = os.getenv("OPENAI_API_KEY") def process_pdf(uploaded_file): temp_file = "./temp.pdf" with open(temp_file, "wb") as file: file.write(uploaded_file.getvalue()) file_name = uploaded_file.name loader = PyPDFLoader(temp_file) documents = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) splits = text_splitter.split_documents(documents) return splits def create_vectorstore(splits): embeddings = OpenAIEmbeddings() client = chromadb.Client() vectorstore = Chroma.from_documents( splits, embeddings, client=client ) retriever = vectorstore.as_retriever() return retriever def summarize_document(docs, llm): prompt = ChatPromptTemplate.from_messages( [("system", "Write a concise summary of the following:\\n\\n{context}")] ) chain = create_stuff_documents_chain(llm, prompt) with get_openai_callback() as cb: summary = chain.invoke({"context": docs}) return summary, cb def ask_question(query, retriever, llm): prompt_template = PromptTemplate.from_template( """ You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise. {context} Question: {question} Answer: """ ) qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True ) with get_openai_callback() as cb: result = qa_chain.invoke(query) return result, cb st.title("Research Paper Summarization & Question Answering") st.write("Upload a research paper (PDF), summarize its content, or ask specific questions related to the document.") uploaded_file = st.file_uploader("Upload a PDF Document", type="pdf") if uploaded_file is not None: with st.spinner("Processing the document..."): docs = process_pdf(uploaded_file) st.success("Document processed successfully!") llm = ChatOpenAI(model_name="gpt-4o-mini") if st.button("Summarize Document"): with st.spinner("Summarizing the document..."): summary, cb = summarize_document(docs, llm) st.subheader("Summary:") st.write(summary) st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}") query = st.text_input("Ask a question related to the document:") if st.button("Get Answer"): with st.spinner("Retrieving the answer..."): retriever = create_vectorstore(docs) answer, cb = ask_question(query, retriever, llm) st.subheader("Answer:") st.write(answer) st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}") st.sidebar.title("Instructions") st.sidebar.write(""" 1. Upload a research paper in PDF format. 2. Choose to either summarize the entire document or ask a specific question about its content. 3. For summarization, click the 'Summarize Document' button. 4. For Q&A, type your question in the input box and click 'Get Answer'. 5. Wait a few seconds for the response. """)