Spaces:

notabaka
/

ASRtest

Runtime error

notabaka commited on Feb 23

Commit

f52a963

•

1 Parent(s): faa2e50

klo

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,24 +26,34 @@ st.title("Text Similarity Model")
 task = 'Given a web search query, retrieve relevant passages that answer the query'
 docs = st.sidebar.file_uploader("Upload documents", accept_multiple_files=True, type=['txt','pdf','xlsx','docx'])
 query = st.text_input("Enter search query")
 click = st.button("Search")
-import pdfplumber
-import docx2txt
 def extract_text(doc):
     if doc.type == 'text/plain':
         return doc.read().decode('utf-8')
     if doc.name.endswith(".pdf"):
-        raw = doc.read()
-        # Handle null bytes
-        raw = raw.replace(b'\x00', b'')
-        with pdfplumber.open(raw) as pdf:
             pages = [page.extract_text() for page in pdf.pages]
             return "\n".join(pages)

 task = 'Given a web search query, retrieve relevant passages that answer the query'
+UPLOAD_DIR = "uploads"
+if not os.path.exists(UPLOAD_DIR):
+    os.mkdir(UPLOAD_DIR)
+def save_upload(uploaded_file):
+    filepath = os.path.join(UPLOAD_DIR, uploaded_file.name)
+    with open(filepath,"wb") as f:
+        f.write(uploaded_file.getbuffer())
+    return filepath
 docs = st.sidebar.file_uploader("Upload documents", accept_multiple_files=True, type=['txt','pdf','xlsx','docx'])
 query = st.text_input("Enter search query")
 click = st.button("Search")
 def extract_text(doc):
     if doc.type == 'text/plain':
         return doc.read().decode('utf-8')
     if doc.name.endswith(".pdf"):
+        docPath = save_upload(doc)
+        with pdfplumber.open(docPath) as pdf:
             pages = [page.extract_text() for page in pdf.pages]
             return "\n".join(pages)