SnehaAkula commited on
Commit
b26dc5d
1 Parent(s): 1d1c23a

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -3,7 +3,7 @@ import streamlit as st
3
  import fitz
4
  from PIL import Image
5
  import tempfile
6
- from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
7
  from langchain.chains.question_answering import load_qa_chain
8
  from docx import Document
9
  import io
@@ -27,9 +27,9 @@ def process_document(uploaded_file, query):
27
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
28
  tmp_file.write(uploaded_file.read())
29
  file_extension = os.path.splitext(uploaded_file.name)[1].lower()
 
30
  if file_extension == ".pdf":
31
  loader = PyPDFLoader(tmp_file.name)
32
- document_text = None
33
  elif file_extension == ".docx":
34
  loader = Docx2txtLoader(tmp_file.name)
35
  document = Document(tmp_file.name)
@@ -69,7 +69,7 @@ def main():
69
  if uploaded_file is not None:
70
  st.title("Document Content")
71
  file_extension = os.path.splitext(uploaded_file.name)[1].lower()
72
- if file_extension in [".docx"]:
73
  response, document_text = process_document(uploaded_file, "")
74
  if document_text is not None:
75
  st.text_area("Document Text", value=document_text, height=300)
 
3
  import fitz
4
  from PIL import Image
5
  import tempfile
6
+ from langchain_community.document_loaders import PyPDFLoader, Docx2txtLoader
7
  from langchain.chains.question_answering import load_qa_chain
8
  from docx import Document
9
  import io
 
27
  with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
28
  tmp_file.write(uploaded_file.read())
29
  file_extension = os.path.splitext(uploaded_file.name)[1].lower()
30
+ document_text = None
31
  if file_extension == ".pdf":
32
  loader = PyPDFLoader(tmp_file.name)
 
33
  elif file_extension == ".docx":
34
  loader = Docx2txtLoader(tmp_file.name)
35
  document = Document(tmp_file.name)
 
69
  if uploaded_file is not None:
70
  st.title("Document Content")
71
  file_extension = os.path.splitext(uploaded_file.name)[1].lower()
72
+ if file_extension == ".docx":
73
  response, document_text = process_document(uploaded_file, "")
74
  if document_text is not None:
75
  st.text_area("Document Text", value=document_text, height=300)