Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -18,6 +18,9 @@ import tiktoken
|
|
18 |
import secrets
|
19 |
import openai
|
20 |
import time
|
|
|
|
|
|
|
21 |
|
22 |
tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
23 |
|
@@ -146,6 +149,44 @@ def add_files_to_zip(session_id):
|
|
146 |
arcname = os.path.relpath(file_path, session_id)
|
147 |
zipObj.write(file_path, arcname)
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
## Summary functions ##
|
150 |
|
151 |
## Load each doc from the vector store
|
@@ -321,7 +362,17 @@ with gr.Blocks() as demo:
|
|
321 |
gr.Markdown("Upload your documents and question them.")
|
322 |
with gr.Accordion("Open to enter your API key", open=False):
|
323 |
apikey_input = gr.Textbox(placeholder="Type here your OpenAI API key to use Summarization and Q&A", label="OpenAI API Key",type='password')
|
324 |
-
with gr.Tab("Upload PDF & TXT"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
tb_session_id = gr.Textbox(label='session id')
|
326 |
docs_input = gr.File(file_count="multiple", file_types=[".txt", ".pdf",".zip",".docx"])
|
327 |
db_output = gr.outputs.File(label="Download zipped database")
|
@@ -346,6 +397,9 @@ with gr.Blocks() as demo:
|
|
346 |
history = gr.Textbox(label='History')
|
347 |
history.style(show_copy_button=True)
|
348 |
|
|
|
|
|
|
|
349 |
btn_generate_db.click(embed_files, inputs=[docs_input,tb_session_id], outputs=[db_output,tb_session_id])
|
350 |
btn_reset_db.click(reset_database,inputs=[tb_session_id],outputs=[db_output])
|
351 |
btn_summary.click(summarize_docs, inputs=[apikey_input,tb_session_id], outputs=summary_output)
|
|
|
18 |
import secrets
|
19 |
import openai
|
20 |
import time
|
21 |
+
from duckduckgo_search import DDGS
|
22 |
+
import requests
|
23 |
+
import tempfile
|
24 |
|
25 |
tokenizer = tiktoken.encoding_for_model("gpt-3.5-turbo")
|
26 |
|
|
|
149 |
arcname = os.path.relpath(file_path, session_id)
|
150 |
zipObj.write(file_path, arcname)
|
151 |
|
152 |
+
|
153 |
+
## Search files functions ##
|
154 |
+
|
155 |
+
def search_docs(topic, max_references):
|
156 |
+
doc_list = []
|
157 |
+
with DDGS() as ddgs:
|
158 |
+
i=0
|
159 |
+
for r in ddgs.text('{} filetype:pdf'.format(topic), region='wt-wt', safesearch='On', timelimit='n'):
|
160 |
+
if i>=max_references:
|
161 |
+
break
|
162 |
+
doc_list.append("TITLE : " + r['title'] + " -- BODY : " + r['body'] + " -- URL : " + r['href'])
|
163 |
+
i+=1
|
164 |
+
return doc_list
|
165 |
+
|
166 |
+
|
167 |
+
def store_files(references):
|
168 |
+
url_list=[]
|
169 |
+
temp_files = []
|
170 |
+
for ref in references:
|
171 |
+
url_list.append(ref.split(" ")[-1])
|
172 |
+
for url in url_list:
|
173 |
+
response = requests.get(url)
|
174 |
+
if response.status_code == 200:
|
175 |
+
filename = url.split('/')[-1]
|
176 |
+
if filename.split('.')[-1] == 'pdf':
|
177 |
+
filename = filename[:-4]
|
178 |
+
print('File name.pdf :', filename)
|
179 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False,prefix=filename, suffix='.pdf')
|
180 |
+
else:
|
181 |
+
print('File name :', filename)
|
182 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False,prefix=filename, suffix='.pdf')
|
183 |
+
temp_file.write(response.content)
|
184 |
+
temp_file.close()
|
185 |
+
temp_files.append(temp_file)
|
186 |
+
|
187 |
+
return temp_files
|
188 |
+
|
189 |
+
|
190 |
## Summary functions ##
|
191 |
|
192 |
## Load each doc from the vector store
|
|
|
362 |
gr.Markdown("Upload your documents and question them.")
|
363 |
with gr.Accordion("Open to enter your API key", open=False):
|
364 |
apikey_input = gr.Textbox(placeholder="Type here your OpenAI API key to use Summarization and Q&A", label="OpenAI API Key",type='password')
|
365 |
+
with gr.Tab("Upload PDF & TXT"):
|
366 |
+
with gr.Accordion("Get files from the web", open=False):
|
367 |
+
with gr.Column():
|
368 |
+
topic_input = gr.Textbox(placeholder="Type your research", label="Research")
|
369 |
+
with gr.Row():
|
370 |
+
max_files = gr.Slider(1, 30, step=1, value=10, label="Maximum number of files")
|
371 |
+
btn_search = gr.Button("Search")
|
372 |
+
dd_documents = gr.Dropdown(label='List of documents', info='Click to remove from selection', multiselect=True)
|
373 |
+
dd_documents.style(container=True)
|
374 |
+
with gr.Row():
|
375 |
+
btn_dl = gr.Button("Add these files to the Database")
|
376 |
tb_session_id = gr.Textbox(label='session id')
|
377 |
docs_input = gr.File(file_count="multiple", file_types=[".txt", ".pdf",".zip",".docx"])
|
378 |
db_output = gr.outputs.File(label="Download zipped database")
|
|
|
397 |
history = gr.Textbox(label='History')
|
398 |
history.style(show_copy_button=True)
|
399 |
|
400 |
+
|
401 |
+
btn_search.click(search_docs, inputs=[topic_input, max_files], outputs=dd_documents)
|
402 |
+
btn_dl.click(add_to_db, inputs=[dd_documents,tb_session_id], outputs=[db_output,tb_session_id])
|
403 |
btn_generate_db.click(embed_files, inputs=[docs_input,tb_session_id], outputs=[db_output,tb_session_id])
|
404 |
btn_reset_db.click(reset_database,inputs=[tb_session_id],outputs=[db_output])
|
405 |
btn_summary.click(summarize_docs, inputs=[apikey_input,tb_session_id], outputs=summary_output)
|