Spaces:
Running
Running
trend
Browse files- arxiv_agent.py +84 -65
arxiv_agent.py
CHANGED
@@ -110,8 +110,21 @@ def dailyDownload(agent_ls):
|
|
110 |
agent.paper_embedding = update_paper_file
|
111 |
print("Today is " + agent.newest_day.strftime("%m/%d/%Y"))
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
-
|
|
|
|
|
|
|
|
|
115 |
|
116 |
|
117 |
class ArxivAgent:
|
@@ -119,6 +132,8 @@ class ArxivAgent:
|
|
119 |
|
120 |
self.dataset_path = "./dataset/paper.json"
|
121 |
self.thought_path = "./dataset/thought.json"
|
|
|
|
|
122 |
|
123 |
self.embedding_path = "./dataset/paper_embedding.pkl"
|
124 |
self.thought_embedding_path = './dataset/thought_embedding.pkl'
|
@@ -127,30 +142,24 @@ class ArxivAgent:
|
|
127 |
self.today = datetime.datetime.now().strftime("%m/%d/%Y")
|
128 |
|
129 |
self.newest_day = ""
|
130 |
-
self.
|
131 |
-
|
132 |
self.download()
|
133 |
try:
|
134 |
thread6.run_threaded(dailyDownload, [self])
|
135 |
-
|
136 |
except:
|
137 |
print("Error: unable to start thread")
|
138 |
-
|
139 |
-
# self.paper_by_date = self.paper
|
140 |
def edit_profile(self, profile, author_name):
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
data = json.load(file)
|
145 |
-
data[author_name]=profile
|
146 |
-
with open(filename, "w") as f:
|
147 |
-
json.dump(data, f)
|
148 |
return "Successfully edit profile!"
|
149 |
|
150 |
def get_profile(self, author_name):
|
151 |
if author_name == "": return None
|
152 |
-
|
153 |
-
# pdb.set_trace()
|
154 |
profile = self.get_arxiv_data_by_author(author_name)
|
155 |
return profile
|
156 |
def select_date(self, method, profile_input):
|
@@ -186,16 +195,40 @@ class ArxivAgent:
|
|
186 |
data_chunk_embedding=chunk_embedding_date
|
187 |
profile = profile_input
|
188 |
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
|
192 |
# import pdb
|
193 |
# pdb.set_trace()
|
194 |
-
|
195 |
-
|
196 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
-
|
|
|
|
|
|
|
|
|
|
|
199 |
if key_update not in self.thought:
|
200 |
self.thought[key_update] = []
|
201 |
if key_update not in self.thought_embedding:
|
@@ -205,22 +238,11 @@ class ArxivAgent:
|
|
205 |
self.thought_embedding[key_update].append(get_bert_embedding([trend])[0])
|
206 |
self.thought[key_update].append(idea[0])
|
207 |
self.thought_embedding[key_update].append(get_bert_embedding([idea])[0])
|
208 |
-
# with open(self.dataset_path, "w") as f_:
|
209 |
-
# json.dump(self.paper, f_)
|
210 |
-
|
211 |
-
with open(self.thought_path, "w") as f_:
|
212 |
-
json.dump(self.thought, f_)
|
213 |
-
|
214 |
-
with open(self.thought_embedding_path, "wb") as f:
|
215 |
-
pickle.dump(self.thought_embedding, f)
|
216 |
-
|
217 |
|
218 |
return trend, reference, idea
|
219 |
|
220 |
def response(self, data, profile_input):
|
221 |
-
# dataset = self.paper_by_date
|
222 |
|
223 |
-
# dataset = self.paper
|
224 |
query = [data]
|
225 |
profile = profile_input
|
226 |
|
@@ -315,7 +337,7 @@ class ArxivAgent:
|
|
315 |
|
316 |
|
317 |
|
318 |
-
def
|
319 |
filename = self.feedback_path
|
320 |
|
321 |
if os.path.exists(filename):
|
@@ -330,13 +352,35 @@ class ArxivAgent:
|
|
330 |
m = {}
|
331 |
self.feedback = m.copy()
|
332 |
|
333 |
-
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
|
336 |
-
def load_thought(self):
|
337 |
filename = self.thought_path
|
338 |
filename_emb = self.thought_embedding_path
|
339 |
-
|
340 |
if os.path.exists(filename):
|
341 |
with open(filename,"rb") as f:
|
342 |
content = f.read()
|
@@ -348,7 +392,6 @@ class ArxivAgent:
|
|
348 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
349 |
m = {}
|
350 |
|
351 |
-
|
352 |
if os.path.exists(filename_emb):
|
353 |
with open(filename_emb,"rb") as f:
|
354 |
content = f.read()
|
@@ -366,12 +409,7 @@ class ArxivAgent:
|
|
366 |
|
367 |
|
368 |
|
369 |
-
|
370 |
-
# papers = data[time]['abstract']
|
371 |
-
# papers_embedding=get_bert_embedding(papers)
|
372 |
-
# time_chunks_embed[time.strftime("%m/%d/%Y")] = papers_embedding
|
373 |
-
# return
|
374 |
-
# for k in json_data.keys():
|
375 |
def update_feedback_thought(self, query, ansA, ansB, feedbackA, feedbackB):
|
376 |
try:
|
377 |
thread6.run_threaded(feedback_thought, [self, query, ansA, ansB, feedbackA, feedbackB])
|
@@ -413,22 +451,7 @@ class ArxivAgent:
|
|
413 |
|
414 |
|
415 |
|
416 |
-
|
417 |
-
if os.path.exists(filename):
|
418 |
-
with open(filename,"r") as f:
|
419 |
-
content = f.read()
|
420 |
-
if not content:
|
421 |
-
m = {}
|
422 |
-
else:
|
423 |
-
m = json.loads(content)
|
424 |
-
else:
|
425 |
-
with open(filename, mode='w', encoding='utf-8') as ff:
|
426 |
-
m = {}
|
427 |
-
|
428 |
-
|
429 |
-
json_data = m.copy()
|
430 |
-
|
431 |
-
if author_name in json_data: return json_data[author_name]
|
432 |
|
433 |
author_query = author_name.replace(" ", "+")
|
434 |
url = f"http://export.arxiv.org/api/query?search_query=au:{author_query}&start=0&max_results=300" # Adjust max_results if needed
|
@@ -512,15 +535,11 @@ class ArxivAgent:
|
|
512 |
# pdb.set_trace()
|
513 |
personal_info = "; ".join([f"{details['Title & Abstract']}" for details in papers_list])
|
514 |
info = summarize_research_direction(personal_info)
|
515 |
-
|
516 |
-
with open(filename,"w") as f:
|
517 |
-
json.dump(json_data,f)
|
518 |
-
return json_data[author_name]
|
519 |
|
520 |
-
|
521 |
|
522 |
else:
|
523 |
-
# print("Failed to fetch data from arXiv.")
|
524 |
return None
|
525 |
|
526 |
|
|
|
110 |
agent.paper_embedding = update_paper_file
|
111 |
print("Today is " + agent.newest_day.strftime("%m/%d/%Y"))
|
112 |
|
113 |
+
def dailySave(agent_ls):
|
114 |
+
agent = agent_ls[0]
|
115 |
+
while True:
|
116 |
+
time.sleep(DAY_TIME)
|
117 |
+
with open(agent.trend_idea_path, "w") as f_:
|
118 |
+
json.dump(agent.trend_idea, f_)
|
119 |
+
|
120 |
+
with open(agent.thought_path, "w") as f_:
|
121 |
+
json.dump(agent.thought, f_)
|
122 |
|
123 |
+
with open(agent.thought_embedding_path, "wb") as f:
|
124 |
+
pickle.dump(agent.thought_embedding, f)
|
125 |
+
|
126 |
+
with open(agent.profile_path,"w") as f:
|
127 |
+
json.dump(agent.profile,f)
|
128 |
|
129 |
|
130 |
class ArxivAgent:
|
|
|
132 |
|
133 |
self.dataset_path = "./dataset/paper.json"
|
134 |
self.thought_path = "./dataset/thought.json"
|
135 |
+
self.trend_idea_path = "./dataset/trend_idea.json"
|
136 |
+
self.profile_path = "./dataset/profile.json"
|
137 |
|
138 |
self.embedding_path = "./dataset/paper_embedding.pkl"
|
139 |
self.thought_embedding_path = './dataset/thought_embedding.pkl'
|
|
|
142 |
self.today = datetime.datetime.now().strftime("%m/%d/%Y")
|
143 |
|
144 |
self.newest_day = ""
|
145 |
+
self.load_cache()
|
146 |
+
|
147 |
self.download()
|
148 |
try:
|
149 |
thread6.run_threaded(dailyDownload, [self])
|
150 |
+
thread6.run_threaded(dailySave, [self])
|
151 |
except:
|
152 |
print("Error: unable to start thread")
|
153 |
+
|
|
|
154 |
def edit_profile(self, profile, author_name):
|
155 |
+
|
156 |
+
self.profile[author_name]=profile
|
157 |
+
|
|
|
|
|
|
|
|
|
158 |
return "Successfully edit profile!"
|
159 |
|
160 |
def get_profile(self, author_name):
|
161 |
if author_name == "": return None
|
162 |
+
|
|
|
163 |
profile = self.get_arxiv_data_by_author(author_name)
|
164 |
return profile
|
165 |
def select_date(self, method, profile_input):
|
|
|
195 |
data_chunk_embedding=chunk_embedding_date
|
196 |
profile = profile_input
|
197 |
|
198 |
+
key_update = list(self.paper.keys())[-1]
|
199 |
+
isQuery = False
|
200 |
+
if profile in self.trend_idea:
|
201 |
+
if key_update in self.trend_idea[profile]:
|
202 |
+
if method in self.trend_idea[profile][key_update]:
|
203 |
+
trend = self.trend_idea[profile][key_update][method]["trend"]
|
204 |
+
reference = self.trend_idea[profile][key_update][method]["reference"]
|
205 |
+
idea = self.trend_idea[profile][key_update][method]["idea"]
|
206 |
+
isQuery = True
|
207 |
|
208 |
# import pdb
|
209 |
# pdb.set_trace()
|
210 |
+
if not(isQuery):
|
211 |
+
trend, paper_link = summarize_research_field(profile, "Machine Learning", dataset,data_chunk_embedding) # trend
|
212 |
+
reference = papertitleAndLink(paper_link)
|
213 |
+
idea = generate_ideas(trend) # idea
|
214 |
+
if profile in self.trend_idea:
|
215 |
+
if key_update in self.trend_idea[profile]:
|
216 |
+
if not(method in self.trend_idea[profile][key_update]):
|
217 |
+
self.trend_idea[profile][key_update][method] = {}
|
218 |
+
else:
|
219 |
+
self.trend_idea[profile][key_update] = {}
|
220 |
+
self.trend_idea[profile][key_update][method] = {}
|
221 |
+
else:
|
222 |
+
self.trend_idea[profile] = {}
|
223 |
+
self.trend_idea[profile][key_update] = {}
|
224 |
+
self.trend_idea[profile][key_update][method] = {}
|
225 |
|
226 |
+
self.trend_idea[profile][key_update][method]["trend"] = trend
|
227 |
+
self.trend_idea[profile][key_update][method]["reference"] = reference
|
228 |
+
self.trend_idea[profile][key_update][method]["idea"] = idea
|
229 |
+
|
230 |
+
|
231 |
+
|
232 |
if key_update not in self.thought:
|
233 |
self.thought[key_update] = []
|
234 |
if key_update not in self.thought_embedding:
|
|
|
238 |
self.thought_embedding[key_update].append(get_bert_embedding([trend])[0])
|
239 |
self.thought[key_update].append(idea[0])
|
240 |
self.thought_embedding[key_update].append(get_bert_embedding([idea])[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
return trend, reference, idea
|
243 |
|
244 |
def response(self, data, profile_input):
|
|
|
245 |
|
|
|
246 |
query = [data]
|
247 |
profile = profile_input
|
248 |
|
|
|
337 |
|
338 |
|
339 |
|
340 |
+
def load_cache(self):
|
341 |
filename = self.feedback_path
|
342 |
|
343 |
if os.path.exists(filename):
|
|
|
352 |
m = {}
|
353 |
self.feedback = m.copy()
|
354 |
|
355 |
+
filename = self.trend_idea_path
|
356 |
|
357 |
+
if os.path.exists(filename):
|
358 |
+
with open(filename,"rb") as f:
|
359 |
+
content = f.read()
|
360 |
+
if not content:
|
361 |
+
m = {}
|
362 |
+
else:
|
363 |
+
m = json.loads(content)
|
364 |
+
else:
|
365 |
+
with open(filename, mode='w', encoding='utf-8') as ff:
|
366 |
+
m = {}
|
367 |
+
self.trend_idea = m.copy()
|
368 |
+
|
369 |
+
filename = self.profile_path
|
370 |
+
if os.path.exists(filename):
|
371 |
+
with open(filename,"rb") as f:
|
372 |
+
content = f.read()
|
373 |
+
if not content:
|
374 |
+
m = {}
|
375 |
+
else:
|
376 |
+
m = json.loads(content)
|
377 |
+
else:
|
378 |
+
with open(filename, mode='w', encoding='utf-8') as ff:
|
379 |
+
m = {}
|
380 |
+
self.profile = m.copy()
|
381 |
|
|
|
382 |
filename = self.thought_path
|
383 |
filename_emb = self.thought_embedding_path
|
|
|
384 |
if os.path.exists(filename):
|
385 |
with open(filename,"rb") as f:
|
386 |
content = f.read()
|
|
|
392 |
with open(filename, mode='w', encoding='utf-8') as ff:
|
393 |
m = {}
|
394 |
|
|
|
395 |
if os.path.exists(filename_emb):
|
396 |
with open(filename_emb,"rb") as f:
|
397 |
content = f.read()
|
|
|
409 |
|
410 |
|
411 |
|
412 |
+
|
|
|
|
|
|
|
|
|
|
|
413 |
def update_feedback_thought(self, query, ansA, ansB, feedbackA, feedbackB):
|
414 |
try:
|
415 |
thread6.run_threaded(feedback_thought, [self, query, ansA, ansB, feedbackA, feedbackB])
|
|
|
451 |
|
452 |
|
453 |
|
454 |
+
if author_name in self.profile: return self.profile[author_name]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
455 |
|
456 |
author_query = author_name.replace(" ", "+")
|
457 |
url = f"http://export.arxiv.org/api/query?search_query=au:{author_query}&start=0&max_results=300" # Adjust max_results if needed
|
|
|
535 |
# pdb.set_trace()
|
536 |
personal_info = "; ".join([f"{details['Title & Abstract']}" for details in papers_list])
|
537 |
info = summarize_research_direction(personal_info)
|
538 |
+
self.profile[author_name] = info
|
|
|
|
|
|
|
539 |
|
540 |
+
return self.profile[author_name]
|
541 |
|
542 |
else:
|
|
|
543 |
return None
|
544 |
|
545 |
|