SnehaAkula commited on
Commit
6728994
1 Parent(s): a94ad2b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +127 -0
  2. requirements.txt +107 -0
app.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import fitz
4
+ from PIL import Image
5
+ import tempfile
6
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain_openai import OpenAI
9
+ from docx import Document
10
+ import io
11
+
12
+
13
+
14
+ # Set OpenAI API key
15
+ os.environ["OPENAI_API_KEY"] = "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"
16
+
17
+ # Initialize conversation history list
18
+ if "conversation_history" not in st.session_state:
19
+ st.session_state.conversation_history = []
20
+
21
+ # Function to load document and perform question answering (cached)
22
+ from docx import Document
23
+
24
+ @st.cache_data
25
+ def process_document(uploaded_file, query):
26
+ # Save uploaded file to temporary directory
27
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
28
+ tmp_file.write(uploaded_file.read())
29
+
30
+ # Load document based on file type
31
+ file_extension = os.path.splitext(uploaded_file.name)[1].lower()
32
+ if file_extension == ".pdf":
33
+ loader = PyPDFLoader(tmp_file.name)
34
+ document_text = None
35
+ elif file_extension == ".docx":
36
+ loader = Docx2txtLoader(tmp_file.name)
37
+ document = Document(tmp_file.name)
38
+ document_text = "\n".join([paragraph.text for paragraph in document.paragraphs])
39
+ else:
40
+ st.error("Unsupported file format. Please upload a text file (.txt), a PDF file (.pdf), or a Word document (.docx).")
41
+ return "", None
42
+
43
+ documents = loader.load()
44
+
45
+ # Load QA chain
46
+ chain = load_qa_chain(llm=OpenAI(), verbose=True)
47
+
48
+ # Perform question answering
49
+ response = chain.invoke({"input_documents": documents, "question": query})
50
+
51
+ # Remove temporary file
52
+ os.unlink(tmp_file.name)
53
+
54
+ return response["output_text"], document_text
55
+
56
+
57
+
58
+
59
+
60
+ # Function to update conversation history
61
+ def update_conversation(query, response):
62
+ st.session_state.conversation_history.append({"question": query, "answer": response})
63
+
64
+ # Function to convert PDF pages to images
65
+ def pdf_to_images(pdf_bytes):
66
+ doc = fitz.open("pdf", pdf_bytes)
67
+ images = []
68
+
69
+ for page_num in range(doc.page_count):
70
+ page = doc[page_num]
71
+ image = page.get_pixmap()
72
+ img = Image.frombytes("RGB", [image.width, image.height], image.samples)
73
+ images.append(img)
74
+
75
+ return images
76
+
77
+ # Streamlit UI
78
+ def main():
79
+ # Set sidebar title
80
+ st.sidebar.title("7steps.AI")
81
+ st.sidebar.markdown("---")
82
+
83
+ # File uploader for document in sidebar
84
+ uploaded_file = st.sidebar.file_uploader("Upload a document", type=["pdf", "docx"])
85
+
86
+ # Display document content or images
87
+ if uploaded_file is not None:
88
+ st.title("Document Content")
89
+ file_extension = os.path.splitext(uploaded_file.name)[1].lower()
90
+ if file_extension in [".docx"]:
91
+ _, document_text = process_document(uploaded_file, "")
92
+ if document_text is not None:
93
+ st.text_area("Document Text", value=document_text, height=300)
94
+ elif file_extension == ".pdf":
95
+ images = pdf_to_images(uploaded_file.getvalue())
96
+ if images:
97
+ page_number = st.number_input("Page Number", value=1, min_value=1, max_value=len(images))
98
+ st.image(images[page_number - 1], caption=f"Page {page_number}", use_column_width=True)
99
+
100
+ # Download button for images
101
+ img_bytes = io.BytesIO()
102
+ images[page_number - 1].save(img_bytes, format='PNG')
103
+ st.download_button("Download Image", img_bytes.getvalue(), f'Page_{page_number}.png')
104
+
105
+ # Text box for new question in sidebar
106
+ query = st.sidebar.text_input("Enter your question:")
107
+
108
+ # "Ask" button in sidebar
109
+ if st.sidebar.button("Ask"):
110
+ if uploaded_file is not None:
111
+ # Process document and display response
112
+ response, _ = process_document(uploaded_file, query)
113
+ if response: # Check if response is not empty
114
+ # Update conversation history
115
+ update_conversation(query, response)
116
+ else:
117
+ st.sidebar.write("Please upload a document first.")
118
+
119
+ # Display conversation history
120
+ st.title("Conversation History")
121
+ for item in st.session_state.conversation_history:
122
+ st.write("You:", item["question"])
123
+ st.write("AI:", item["answer"])
124
+
125
+ # Run the application
126
+ if __name__ == "__main__":
127
+ main()
requirements.txt ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.3
2
+ aiosignal==1.3.1
3
+ altair==5.2.0
4
+ annotated-types==0.6.0
5
+ anyio==4.3.0
6
+ async-timeout==4.0.3
7
+ attrs==23.2.0
8
+ blinker==1.7.0
9
+ cachetools==5.3.3
10
+ certifi==2024.2.2
11
+ charset-normalizer==3.3.2
12
+ ci-info==0.3.0
13
+ click==8.1.7
14
+ colorama==0.4.6
15
+ configobj==5.0.8
16
+ configparser==6.0.1
17
+ dataclasses-json==0.6.4
18
+ distro==1.9.0
19
+ docx2txt==0.8
20
+ etelemetry==0.3.1
21
+ exceptiongroup==1.2.0
22
+ filelock==3.13.1
23
+ frozenlist==1.4.1
24
+ gitdb==4.0.11
25
+ GitPython==3.1.42
26
+ greenlet==3.0.3
27
+ h11==0.14.0
28
+ httpcore==1.0.4
29
+ httplib2==0.22.0
30
+ httpx==0.27.0
31
+ idna==3.6
32
+ importlib_resources==6.1.3
33
+ isodate==0.6.1
34
+ Jinja2==3.1.3
35
+ jsonpatch==1.33
36
+ jsonpointer==2.4
37
+ jsonschema==4.21.1
38
+ jsonschema-specifications==2023.12.1
39
+ langchain==0.1.11
40
+ langchain-community==0.0.27
41
+ langchain-core==0.1.30
42
+ langchain-openai==0.0.8
43
+ langchain-text-splitters==0.0.1
44
+ langsmith==0.1.23
45
+ looseversion==1.3.0
46
+ lxml==5.1.0
47
+ markdown-it-py==3.0.0
48
+ MarkupSafe==2.1.5
49
+ marshmallow==3.21.1
50
+ mdurl==0.1.2
51
+ multidict==6.0.5
52
+ mypy-extensions==1.0.0
53
+ networkx==3.1
54
+ nibabel==5.2.1
55
+ nipype==1.8.6
56
+ numpy==1.24.4
57
+ openai==1.13.3
58
+ orjson==3.9.15
59
+ packaging==23.2
60
+ pandas==2.0.3
61
+ pathlib==1.0.1
62
+ pillow==10.2.0
63
+ pkgutil_resolve_name==1.3.10
64
+ protobuf==4.25.3
65
+ prov==2.0.0
66
+ pyarrow==15.0.1
67
+ pydantic==2.6.3
68
+ pydantic_core==2.16.3
69
+ pydeck==0.8.1b0
70
+ pydot==2.0.0
71
+ Pygments==2.17.2
72
+ PyMuPDF==1.23.26
73
+ PyMuPDFb==1.23.22
74
+ pyparsing==3.1.2
75
+ pypdf==4.1.0
76
+ python-dateutil==2.9.0.post0
77
+ python-docx==1.1.0
78
+ pytz==2024.1
79
+ pyxnat==1.6.2
80
+ PyYAML==6.0.1
81
+ rdflib==7.0.0
82
+ referencing==0.33.0
83
+ regex==2023.12.25
84
+ requests==2.31.0
85
+ rich==13.7.1
86
+ rpds-py==0.18.0
87
+ scipy==1.10.1
88
+ simplejson==3.19.2
89
+ six==1.16.0
90
+ smmap==5.0.1
91
+ sniffio==1.3.1
92
+ SQLAlchemy==2.0.28
93
+ streamlit==1.32.0
94
+ tenacity==8.2.3
95
+ tiktoken==0.6.0
96
+ toml==0.10.2
97
+ toolz==0.12.1
98
+ tornado==6.4
99
+ tqdm==4.66.2
100
+ traits==6.3.2
101
+ typing-inspect==0.9.0
102
+ typing_extensions==4.10.0
103
+ tzdata==2024.1
104
+ urllib3==2.2.1
105
+ watchdog==4.0.0
106
+ yarl==1.9.4
107
+ zipp==3.17.0