File size: 3,587 Bytes
65d7891
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# streamlit_app.py

import streamlit as st
import os
import openai
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import ChatOpenAI
from langchain.callbacks import get_openai_callback

openai.api_key = os.getenv("OPENAI_API_KEY")

def process_pdf(file):
    loader = PyPDFLoader(file)
    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(documents)
    return splits

def create_vectorstore(splits):
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(splits, embeddings)
    retriever = vectorstore.as_retriever()
    return retriever

def summarize_document(docs, llm):
    prompt = """
    Write a concise summary of the following:

    {context}
    """
    chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=None)
    with get_openai_callback() as cb:  
      summary = chain.invoke({"context": docs})
    return summary, cb

def ask_question(query, retriever, llm):
    prompt_template = PromptTemplate.from_template(
        """
        You are an assistant for question-answering tasks.
        Use the following pieces of retrieved context to answer the question.
        If you don't know the answer, just say that you don't know.
        Use three sentences maximum and keep the answer concise.

        {context}
        Question: {question}
        Answer:
        """
    )
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
        return_source_documents=True
    )
    with get_openai_callback() as cb:
        result = qa_chain.invoke(query)
        return result, cb

st.title("Research Paper Summarization & Question Answering")
st.write("Upload a research paper (PDF), summarize its content, or ask specific questions related to the document.")

uploaded_file = st.file_uploader("Upload a PDF Document", type="pdf")

if uploaded_file is not None:
    with st.spinner("Processing the document..."):
        docs = process_pdf(uploaded_file)
        st.success("Document processed successfully!")

    llm = ChatOpenAI(model_name="gpt-4o-mini")

    if st.button("Summarize Document"):
        with st.spinner("Summarizing the document..."):
            summary, cb = summarize_document(docs, llm)
            st.subheader("Summary:")
            st.write(summary)
            st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}")

    query = st.text_input("Ask a question related to the document:")
    if st.button("Get Answer"):
        with st.spinner("Retrieving the answer..."):
            retriever = create_vectorstore(docs)
            answer, cb = ask_question(query, retriever, llm)
            st.subheader("Answer:")
            st.write(answer)
            st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}")

st.sidebar.title("Instructions")
st.sidebar.write("""
1. Upload a research paper in PDF format.
2. Choose to either summarize the entire document or ask a specific question about its content.
3. For summarization, click the 'Summarize Document' button.
4. For Q&A, type your question in the input box and click 'Get Answer'.
5. Wait a few seconds for the response.
""")