PDFAISS-2.3.3

Sleeping

App Files Files Community

YchKhan commited on Jun 19, 2023

Commit

63f6580

•

1 Parent(s): 3774c69

Update app.py

Browse files

Files changed (1) hide show

app.py +55 -1

app.py CHANGED Viewed

@@ -18,6 +18,9 @@ import tiktoken
 import secrets
 import openai
 import time
 tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")
@@ -146,6 +149,44 @@ def add_files_to_zip(session_id):
                 arcname = os.path.relpath(file_path, session_id)
                 zipObj.write(file_path, arcname)
 ## Summary functions ##
 ## Load each doc from the vector store
@@ -321,7 +362,17 @@ with gr.Blocks() as demo:
     gr.Markdown("Upload your documents and question them.")
     with gr.Accordion("Open to enter your API key", open=False):
         apikey_input = gr.Textbox(placeholder="Type here your OpenAI API key to use Summarization and Q&A", label="OpenAI API Key",type='password')
-    with gr.Tab("Upload PDF & TXT"):
         tb_session_id = gr.Textbox(label='session id')
         docs_input = gr.File(file_count="multiple", file_types=[".txt", ".pdf",".zip",".docx"])
         db_output = gr.outputs.File(label="Download zipped database")
@@ -346,6 +397,9 @@ with gr.Blocks() as demo:
             history = gr.Textbox(label='History')
             history.style(show_copy_button=True)
     btn_generate_db.click(embed_files, inputs=[docs_input,tb_session_id], outputs=[db_output,tb_session_id])
     btn_reset_db.click(reset_database,inputs=[tb_session_id],outputs=[db_output])
     btn_summary.click(summarize_docs, inputs=[apikey_input,tb_session_id], outputs=summary_output)

 import secrets
 import openai
 import time
+from duckduckgo_search import DDGS
+import requests
+import tempfile
 tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")
                 arcname = os.path.relpath(file_path, session_id)
                 zipObj.write(file_path, arcname)
+## Search files functions ##
+def search_docs(topic, max_references):
+  doc_list = []
+  with DDGS() as ddgs:
+    i=0
+    for r in ddgs.text('{} filetype:pdf'.format(topic), region='wt-wt', safesearch='On', timelimit='n'):
+      if i>=max_references:
+        break
+      doc_list.append("TITLE : " + r['title'] + " -- BODY : " + r['body'] + " -- URL : " + r['href'])
+      i+=1
+  return doc_list
+def store_files(references):
+    url_list=[]
+    temp_files = []
+    for ref in references:
+        url_list.append(ref.split(" ")[-1])
+    for url in url_list:
+        response = requests.get(url)
+        if response.status_code == 200:
+            filename = url.split('/')[-1]
+            if filename.split('.')[-1] == 'pdf':
+                filename = filename[:-4]
+                print('File name.pdf :', filename)
+                temp_file = tempfile.NamedTemporaryFile(delete=False,prefix=filename, suffix='.pdf')
+            else:
+                print('File name :', filename)
+                temp_file = tempfile.NamedTemporaryFile(delete=False,prefix=filename, suffix='.pdf')
+            temp_file.write(response.content)
+            temp_file.close()
+            temp_files.append(temp_file)
+    return temp_files
 ## Summary functions ##
 ## Load each doc from the vector store
     gr.Markdown("Upload your documents and question them.")
     with gr.Accordion("Open to enter your API key", open=False):
         apikey_input = gr.Textbox(placeholder="Type here your OpenAI API key to use Summarization and Q&A", label="OpenAI API Key",type='password')
+    with gr.Tab("Upload PDF & TXT"):
+        with gr.Accordion("Get files from the web", open=False):
+            with gr.Column():
+                topic_input = gr.Textbox(placeholder="Type your research", label="Research")
+                with gr.Row():
+                    max_files = gr.Slider(1, 30, step=1, value=10, label="Maximum number of files")
+                    btn_search = gr.Button("Search")
+                dd_documents = gr.Dropdown(label='List of documents', info='Click to remove from selection', multiselect=True)
+                dd_documents.style(container=True)
+                with gr.Row():
+                    btn_dl = gr.Button("Add these files to the Database")
         tb_session_id = gr.Textbox(label='session id')
         docs_input = gr.File(file_count="multiple", file_types=[".txt", ".pdf",".zip",".docx"])
         db_output = gr.outputs.File(label="Download zipped database")
             history = gr.Textbox(label='History')
             history.style(show_copy_button=True)
+    btn_search.click(search_docs, inputs=[topic_input, max_files], outputs=dd_documents)
+    btn_dl.click(add_to_db, inputs=[dd_documents,tb_session_id], outputs=[db_output,tb_session_id])
     btn_generate_db.click(embed_files, inputs=[docs_input,tb_session_id], outputs=[db_output,tb_session_id])
     btn_reset_db.click(reset_database,inputs=[tb_session_id],outputs=[db_output])
     btn_summary.click(summarize_docs, inputs=[apikey_input,tb_session_id], outputs=summary_output)