Spaces:

chukbert
/

Research-Paper-Summarizer-and-QA

Sleeping

App Files Files Community

Research-Paper-Summarizer-and-QA / app.py

chukbert

Create app.py

65d7891 verified 14 days ago

raw

history blame

3.59 kB

	# streamlit_app.py

	import streamlit as st
	import os
	import openai
	from langchain.vectorstores import Chroma
	from langchain.embeddings.openai import OpenAIEmbeddings
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_openai import ChatOpenAI
	from langchain.callbacks import get_openai_callback

	openai.api_key = os.getenv("OPENAI_API_KEY")

	def process_pdf(file):
	loader = PyPDFLoader(file)
	documents = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	splits = text_splitter.split_documents(documents)
	return splits

	def create_vectorstore(splits):
	embeddings = OpenAIEmbeddings()
	vectorstore = Chroma.from_documents(splits, embeddings)
	retriever = vectorstore.as_retriever()
	return retriever

	def summarize_document(docs, llm):
	prompt = """
	Write a concise summary of the following:

	{context}
	"""
	chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=None)
	with get_openai_callback() as cb:
	summary = chain.invoke({"context": docs})
	return summary, cb

	def ask_question(query, retriever, llm):
	prompt_template = PromptTemplate.from_template(
	"""
	You are an assistant for question-answering tasks.
	Use the following pieces of retrieved context to answer the question.
	If you don't know the answer, just say that you don't know.
	Use three sentences maximum and keep the answer concise.

	{context}
	Question: {question}
	Answer:
	"""
	)
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	return_source_documents=True
	)
	with get_openai_callback() as cb:
	result = qa_chain.invoke(query)
	return result, cb

	st.title("Research Paper Summarization & Question Answering")
	st.write("Upload a research paper (PDF), summarize its content, or ask specific questions related to the document.")

	uploaded_file = st.file_uploader("Upload a PDF Document", type="pdf")

	if uploaded_file is not None:
	with st.spinner("Processing the document..."):
	docs = process_pdf(uploaded_file)
	st.success("Document processed successfully!")

	llm = ChatOpenAI(model_name="gpt-4o-mini")

	if st.button("Summarize Document"):
	with st.spinner("Summarizing the document..."):
	summary, cb = summarize_document(docs, llm)
	st.subheader("Summary:")
	st.write(summary)
	st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}")

	query = st.text_input("Ask a question related to the document:")
	if st.button("Get Answer"):
	with st.spinner("Retrieving the answer..."):
	retriever = create_vectorstore(docs)
	answer, cb = ask_question(query, retriever, llm)
	st.subheader("Answer:")
	st.write(answer)
	st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}")

	st.sidebar.title("Instructions")
	st.sidebar.write("""
	1. Upload a research paper in PDF format.
	2. Choose to either summarize the entire document or ask a specific question about its content.
	3. For summarization, click the 'Summarize Document' button.
	4. For Q&A, type your question in the input box and click 'Get Answer'.
	5. Wait a few seconds for the response.
	""")