chukbert commited on
Commit
65d7891
1 Parent(s): 013dc0b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -0
app.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # streamlit_app.py
2
+
3
+ import streamlit as st
4
+ import os
5
+ import openai
6
+ from langchain.vectorstores import Chroma
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
9
+ from langchain.chains import RetrievalQA
10
+ from langchain.prompts import PromptTemplate
11
+ from langchain_community.document_loaders import PyPDFLoader
12
+ from langchain_openai import ChatOpenAI
13
+ from langchain.callbacks import get_openai_callback
14
+
15
+ openai.api_key = os.getenv("OPENAI_API_KEY")
16
+
17
+ def process_pdf(file):
18
+ loader = PyPDFLoader(file)
19
+ documents = loader.load()
20
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
21
+ splits = text_splitter.split_documents(documents)
22
+ return splits
23
+
24
+ def create_vectorstore(splits):
25
+ embeddings = OpenAIEmbeddings()
26
+ vectorstore = Chroma.from_documents(splits, embeddings)
27
+ retriever = vectorstore.as_retriever()
28
+ return retriever
29
+
30
+ def summarize_document(docs, llm):
31
+ prompt = """
32
+ Write a concise summary of the following:
33
+
34
+ {context}
35
+ """
36
+ chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=None)
37
+ with get_openai_callback() as cb:
38
+ summary = chain.invoke({"context": docs})
39
+ return summary, cb
40
+
41
+ def ask_question(query, retriever, llm):
42
+ prompt_template = PromptTemplate.from_template(
43
+ """
44
+ You are an assistant for question-answering tasks.
45
+ Use the following pieces of retrieved context to answer the question.
46
+ If you don't know the answer, just say that you don't know.
47
+ Use three sentences maximum and keep the answer concise.
48
+
49
+ {context}
50
+ Question: {question}
51
+ Answer:
52
+ """
53
+ )
54
+ qa_chain = RetrievalQA.from_chain_type(
55
+ llm=llm,
56
+ chain_type="stuff",
57
+ retriever=retriever,
58
+ return_source_documents=True
59
+ )
60
+ with get_openai_callback() as cb:
61
+ result = qa_chain.invoke(query)
62
+ return result, cb
63
+
64
+ st.title("Research Paper Summarization & Question Answering")
65
+ st.write("Upload a research paper (PDF), summarize its content, or ask specific questions related to the document.")
66
+
67
+ uploaded_file = st.file_uploader("Upload a PDF Document", type="pdf")
68
+
69
+ if uploaded_file is not None:
70
+ with st.spinner("Processing the document..."):
71
+ docs = process_pdf(uploaded_file)
72
+ st.success("Document processed successfully!")
73
+
74
+ llm = ChatOpenAI(model_name="gpt-4o-mini")
75
+
76
+ if st.button("Summarize Document"):
77
+ with st.spinner("Summarizing the document..."):
78
+ summary, cb = summarize_document(docs, llm)
79
+ st.subheader("Summary:")
80
+ st.write(summary)
81
+ st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}")
82
+
83
+ query = st.text_input("Ask a question related to the document:")
84
+ if st.button("Get Answer"):
85
+ with st.spinner("Retrieving the answer..."):
86
+ retriever = create_vectorstore(docs)
87
+ answer, cb = ask_question(query, retriever, llm)
88
+ st.subheader("Answer:")
89
+ st.write(answer)
90
+ st.write(f"Tokens Used: {cb.total_tokens}, Total Cost (USD): ${cb.total_cost:.5f}")
91
+
92
+ st.sidebar.title("Instructions")
93
+ st.sidebar.write("""
94
+ 1. Upload a research paper in PDF format.
95
+ 2. Choose to either summarize the entire document or ask a specific question about its content.
96
+ 3. For summarization, click the 'Summarize Document' button.
97
+ 4. For Q&A, type your question in the input box and click 'Get Answer'.
98
+ 5. Wait a few seconds for the response.
99
+ """)