YchKhan commited on
Commit
63f6580
1 Parent(s): 3774c69

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -1
app.py CHANGED
@@ -18,6 +18,9 @@ import tiktoken
18
  import secrets
19
  import openai
20
  import time
 
 
 
21
 
22
  tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")
23
 
@@ -146,6 +149,44 @@ def add_files_to_zip(session_id):
146
  arcname = os.path.relpath(file_path, session_id)
147
  zipObj.write(file_path, arcname)
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  ## Summary functions ##
150
 
151
  ## Load each doc from the vector store
@@ -321,7 +362,17 @@ with gr.Blocks() as demo:
321
  gr.Markdown("Upload your documents and question them.")
322
  with gr.Accordion("Open to enter your API key", open=False):
323
  apikey_input = gr.Textbox(placeholder="Type here your OpenAI API key to use Summarization and Q&A", label="OpenAI API Key",type='password')
324
- with gr.Tab("Upload PDF & TXT"):
 
 
 
 
 
 
 
 
 
 
325
  tb_session_id = gr.Textbox(label='session id')
326
  docs_input = gr.File(file_count="multiple", file_types=[".txt", ".pdf",".zip",".docx"])
327
  db_output = gr.outputs.File(label="Download zipped database")
@@ -346,6 +397,9 @@ with gr.Blocks() as demo:
346
  history = gr.Textbox(label='History')
347
  history.style(show_copy_button=True)
348
 
 
 
 
349
  btn_generate_db.click(embed_files, inputs=[docs_input,tb_session_id], outputs=[db_output,tb_session_id])
350
  btn_reset_db.click(reset_database,inputs=[tb_session_id],outputs=[db_output])
351
  btn_summary.click(summarize_docs, inputs=[apikey_input,tb_session_id], outputs=summary_output)
 
18
  import secrets
19
  import openai
20
  import time
21
+ from duckduckgo_search import DDGS
22
+ import requests
23
+ import tempfile
24
 
25
  tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")
26
 
 
149
  arcname = os.path.relpath(file_path, session_id)
150
  zipObj.write(file_path, arcname)
151
 
152
+
153
+ ## Search files functions ##
154
+
155
+ def search_docs(topic, max_references):
156
+ doc_list = []
157
+ with DDGS() as ddgs:
158
+ i=0
159
+ for r in ddgs.text('{} filetype:pdf'.format(topic), region='wt-wt', safesearch='On', timelimit='n'):
160
+ if i>=max_references:
161
+ break
162
+ doc_list.append("TITLE : " + r['title'] + " -- BODY : " + r['body'] + " -- URL : " + r['href'])
163
+ i+=1
164
+ return doc_list
165
+
166
+
167
+ def store_files(references):
168
+ url_list=[]
169
+ temp_files = []
170
+ for ref in references:
171
+ url_list.append(ref.split(" ")[-1])
172
+ for url in url_list:
173
+ response = requests.get(url)
174
+ if response.status_code == 200:
175
+ filename = url.split('/')[-1]
176
+ if filename.split('.')[-1] == 'pdf':
177
+ filename = filename[:-4]
178
+ print('File name.pdf :', filename)
179
+ temp_file = tempfile.NamedTemporaryFile(delete=False,prefix=filename, suffix='.pdf')
180
+ else:
181
+ print('File name :', filename)
182
+ temp_file = tempfile.NamedTemporaryFile(delete=False,prefix=filename, suffix='.pdf')
183
+ temp_file.write(response.content)
184
+ temp_file.close()
185
+ temp_files.append(temp_file)
186
+
187
+ return temp_files
188
+
189
+
190
  ## Summary functions ##
191
 
192
  ## Load each doc from the vector store
 
362
  gr.Markdown("Upload your documents and question them.")
363
  with gr.Accordion("Open to enter your API key", open=False):
364
  apikey_input = gr.Textbox(placeholder="Type here your OpenAI API key to use Summarization and Q&A", label="OpenAI API Key",type='password')
365
+ with gr.Tab("Upload PDF & TXT"):
366
+ with gr.Accordion("Get files from the web", open=False):
367
+ with gr.Column():
368
+ topic_input = gr.Textbox(placeholder="Type your research", label="Research")
369
+ with gr.Row():
370
+ max_files = gr.Slider(1, 30, step=1, value=10, label="Maximum number of files")
371
+ btn_search = gr.Button("Search")
372
+ dd_documents = gr.Dropdown(label='List of documents', info='Click to remove from selection', multiselect=True)
373
+ dd_documents.style(container=True)
374
+ with gr.Row():
375
+ btn_dl = gr.Button("Add these files to the Database")
376
  tb_session_id = gr.Textbox(label='session id')
377
  docs_input = gr.File(file_count="multiple", file_types=[".txt", ".pdf",".zip",".docx"])
378
  db_output = gr.outputs.File(label="Download zipped database")
 
397
  history = gr.Textbox(label='History')
398
  history.style(show_copy_button=True)
399
 
400
+
401
+ btn_search.click(search_docs, inputs=[topic_input, max_files], outputs=dd_documents)
402
+ btn_dl.click(add_to_db, inputs=[dd_documents,tb_session_id], outputs=[db_output,tb_session_id])
403
  btn_generate_db.click(embed_files, inputs=[docs_input,tb_session_id], outputs=[db_output,tb_session_id])
404
  btn_reset_db.click(reset_database,inputs=[tb_session_id],outputs=[db_output])
405
  btn_summary.click(summarize_docs, inputs=[apikey_input,tb_session_id], outputs=summary_output)