|
|
|
|
|
import streamlit as st |
|
import os |
|
import openai |
|
from langchain.vectorstores import Chroma |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.chains import RetrievalQA |
|
from langchain.prompts import PromptTemplate |
|
from langchain_community.document_loaders import PyPDFLoader |
|
from langchain_openai import ChatOpenAI |
|
from langchain.callbacks import get_openai_callback |
|
|
|
openai.api_key = os.getenv("OPENAI_API_KEY") |
|
|
|
def process_pdf(file): |
|
loader = PyPDFLoader(file) |
|
documents = loader.load() |
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
splits = text_splitter.split_documents(documents) |
|
return splits |
|
|
|
def create_vectorstore(splits): |
|
embeddings = OpenAIEmbeddings() |
|
vectorstore = Chroma.from_documents(splits, embeddings) |
|
retriever = vectorstore.as_retriever() |
|
return retriever |
|
|
|
def summarize_document(docs, llm): |
|
prompt = """ |
|
Write a concise summary of the following: |
|
|
|
{context} |
|
""" |
|
chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=None) |
|
with get_openai_callback() as cb: |
|
summary = chain.invoke({"context": docs}) |
|
return summary, cb |
|
|
|
def ask_question(query, retriever, llm): |
|
prompt_template = PromptTemplate.from_template( |
|
""" |
|
You are an assistant for question-answering tasks. |
|
Use the following pieces of retrieved context to answer the question. |
|
If you don't know the answer, just say that you don't know. |
|
Use three sentences maximum and keep the answer concise. |
|
|
|
{context} |
|
Question: {question} |
|
Answer: |
|
""" |
|
) |
|
qa_chain = RetrievalQA.from_chain_type( |
|
llm=llm, |
|
chain_type="stuff", |
|
retriever=retriever, |
|
return_source_documents=True |
|
) |
|
with get_openai_callback() as cb: |
|
result = qa_chain.invoke(query) |
|
return result, cb |
|
|
|
st.title("Research Paper Summarization & Question Answering") |
|
st.write("Upload a research paper (PDF), summarize its content, or ask specific questions related to the document.") |
|
|
|
uploaded_file = st.file_uploader("Upload a PDF Document", type="pdf") |
|
|
|
if uploaded_file is not None: |
|
with st.spinner("Processing the document..."): |
|
docs = process_pdf(uploaded_file) |
|
st.success("Document processed successfully!") |
|
|
|
llm = ChatOpenAI(model_name="gpt-4o-mini") |
|
|
|
if st.button("Summarize Document"): |
|
with st.spinner("Summarizing the document..."): |
|
summary, cb = summarize_document(docs, llm) |
|
st.subheader("Summary:") |
|
st.write(summary) |
|
st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}") |
|
|
|
query = st.text_input("Ask a question related to the document:") |
|
if st.button("Get Answer"): |
|
with st.spinner("Retrieving the answer..."): |
|
retriever = create_vectorstore(docs) |
|
answer, cb = ask_question(query, retriever, llm) |
|
st.subheader("Answer:") |
|
st.write(answer) |
|
st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}") |
|
|
|
st.sidebar.title("Instructions") |
|
st.sidebar.write(""" |
|
1. Upload a research paper in PDF format. |
|
2. Choose to either summarize the entire document or ask a specific question about its content. |
|
3. For summarization, click the 'Summarize Document' button. |
|
4. For Q&A, type your question in the input box and click 'Get Answer'. |
|
5. Wait a few seconds for the response. |
|
""") |
|
|