Spaces:
Sleeping
Sleeping
Update controllers/fetch_microsoft_mails.py
Browse files
controllers/fetch_microsoft_mails.py
CHANGED
@@ -62,40 +62,35 @@ def get_company_type(company_name:str)->str:
|
|
62 |
|
63 |
async def get_messages(code: str,websocket:WebSocket,brand_name: Optional[str] = None):
|
64 |
access_token = code
|
65 |
-
g_query =
|
66 |
if brand_name is not None:
|
67 |
-
g_query = f'(subject:"your order" OR subject:receipts OR subject:receipt OR subject: aankoopbon OR subject:reçu OR subject:invoice OR subject:invoices OR
|
68 |
-
page_token = None
|
69 |
messages = []
|
70 |
-
# max_results = 10
|
71 |
-
# gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={jobs_query}&maxResults={max_results}"
|
72 |
-
# gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"})
|
73 |
-
# gmail_data = gmail_response.json()
|
74 |
-
# messages.append(gmail_data['messages'])
|
75 |
def fetch_message_wrapper(message_data):
|
76 |
message_id = message_data.get("id")
|
|
|
|
|
|
|
77 |
if message_id:
|
78 |
-
return fetch_message_data(access_token,
|
79 |
|
80 |
return None
|
81 |
|
82 |
while True:
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
print(len(gmail_data))
|
90 |
-
print(gmail_data)
|
91 |
|
92 |
-
if "
|
93 |
with ThreadPoolExecutor(max_workers=15) as executor:
|
94 |
|
95 |
|
96 |
|
97 |
futures=[executor.submit(fetch_message_wrapper, message_data) for message_data in
|
98 |
-
|
99 |
for future in futures:
|
100 |
message = future.result()
|
101 |
if message:
|
@@ -120,15 +115,15 @@ async def process_message(message:Message, websocket:WebSocket, chunk_size:int):
|
|
120 |
await websocket.send_text("NEXT_MESSAGE")
|
121 |
|
122 |
|
123 |
-
def fetch_message_data(access_token: str,
|
124 |
-
message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}"
|
125 |
-
message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token}"})
|
126 |
-
message_data = message_response.json()
|
127 |
# print(message_data)
|
128 |
-
subject =
|
129 |
-
company_from_mail =
|
130 |
|
131 |
-
body =
|
132 |
|
133 |
attachments,structed_attachment_data = extract_attachments_from_mail(access_token, message_data)
|
134 |
high_level_company_type = get_company_type(company_from_mail)
|
@@ -235,22 +230,44 @@ def fetch_attachment_data(access_token: str, message_id: str, attachment_id: str
|
|
235 |
def extract_attachments_from_mail(access_token: str, message_data: dict) -> List[Attachment]:
|
236 |
attachments = []
|
237 |
structured_data = []
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
|
|
|
|
|
|
|
|
|
|
253 |
return attachments,structured_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
|
256 |
def extract_text(html_content: str) -> str:
|
|
|
62 |
|
63 |
async def get_messages(code: str,websocket:WebSocket,brand_name: Optional[str] = None):
|
64 |
access_token = code
|
65 |
+
g_query = 'subject:"your order" OR subject:"receipts" OR subject:"receipt" OR subject:"aankoopbon" OR subject:"reçu" OR subject:"invoice" OR subject:"invoices" hasattachment:yes'
|
66 |
if brand_name is not None:
|
67 |
+
g_query = f'(subject:"your order" OR subject:"receipts" OR subject:"receipt" OR subject: "aankoopbon" OR subject:"reçu" OR subject:"invoice" OR subject:"invoices" OR from:{brand_name}) AND subject:{brand_name} has:attachment'
|
|
|
68 |
messages = []
|
|
|
|
|
|
|
|
|
|
|
69 |
def fetch_message_wrapper(message_data):
|
70 |
message_id = message_data.get("id")
|
71 |
+
message_subject = message_data.get('subject')
|
72 |
+
message_body_preview = message_data.get('bodyPreview')
|
73 |
+
from_mail = message_data['from']['emailAddress']['address']
|
74 |
if message_id:
|
75 |
+
return fetch_message_data(access_token,message_id,message_data,message_subject,message_body_preview,from_mail)
|
76 |
|
77 |
return None
|
78 |
|
79 |
while True:
|
80 |
+
outlook_url = f"https://graph.microsoft.com/v1.0/me/messages?q={g_query}"
|
81 |
+
|
82 |
+
outlook_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"})
|
83 |
+
outlook_data = gmail_response.json()
|
84 |
+
print(len(outlook_data))
|
85 |
+
print(outlook_data)
|
|
|
|
|
86 |
|
87 |
+
if "value" in gmail_data:
|
88 |
with ThreadPoolExecutor(max_workers=15) as executor:
|
89 |
|
90 |
|
91 |
|
92 |
futures=[executor.submit(fetch_message_wrapper, message_data) for message_data in
|
93 |
+
outlook_data["value"]]
|
94 |
for future in futures:
|
95 |
message = future.result()
|
96 |
if message:
|
|
|
115 |
await websocket.send_text("NEXT_MESSAGE")
|
116 |
|
117 |
|
118 |
+
def fetch_message_data(access_token: str,message_id:str, message_data,message_subject:str , message_body_preview,message_data,from_mail:str) -> Message:
|
119 |
+
# message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}"
|
120 |
+
# message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token}"})
|
121 |
+
# message_data = message_response.json()
|
122 |
# print(message_data)
|
123 |
+
subject = message_subject
|
124 |
+
company_from_mail = from_mail
|
125 |
|
126 |
+
body = message_body_preview
|
127 |
|
128 |
attachments,structed_attachment_data = extract_attachments_from_mail(access_token, message_data)
|
129 |
high_level_company_type = get_company_type(company_from_mail)
|
|
|
230 |
def extract_attachments_from_mail(access_token: str, message_data: dict) -> List[Attachment]:
|
231 |
attachments = []
|
232 |
structured_data = []
|
233 |
+
message_id = message_data.get('id')
|
234 |
+
headers = {
|
235 |
+
'Authorization': f'Bearer {access_token}',
|
236 |
+
'Accept': 'application/json',
|
237 |
+
}
|
238 |
+
attachments_response = requests.get(f'https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments', headers=headers)
|
239 |
+
attachments = attachments_response.json().get('value', [])
|
240 |
+
|
241 |
+
for attachment in attachments:
|
242 |
+
attachment_id = attachment['id']
|
243 |
+
filename = attachment['name']
|
244 |
+
if filename.endswith(".zip") or filename.endswith(".txt") or filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".gif"):
|
245 |
+
continue
|
246 |
+
attachment_response = requests.get(f'https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments/{attachment_id}/$value', headers=headers)
|
247 |
+
data = attachment_response.content
|
248 |
+
raw_text = ut.extract_text_from_attachment(filename , data)
|
249 |
+
struct_data = ut.strcuture_document_data(raw_text)
|
250 |
+
if struct_data:
|
251 |
+
structured_data.append(struct_data)
|
252 |
+
attachments.append(Attachment(attachment_len = len(data),filename=filename, data=data))
|
253 |
return attachments,structured_data
|
254 |
+
#Gmail data fetching below
|
255 |
+
# if "payload" in message_data and "parts" in message_data["payload"]:
|
256 |
+
# for part in message_data["payload"]["parts"]:
|
257 |
+
# if "body" in part and "attachmentId" in part["body"]:
|
258 |
+
# attachment_id = part["body"]["attachmentId"]
|
259 |
+
# attachment_data = fetch_attachment_data(access_token, message_data["id"], attachment_id)
|
260 |
+
# filename = part.get("filename", "untitled.txt")
|
261 |
+
# if filename.endswith(".zip") or filename.endswith(".txt") or filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".gif"):
|
262 |
+
# continue
|
263 |
+
# data = attachment_data.get("data", "")
|
264 |
+
# raw_text=ut.extract_text_from_attachment(filename , data)
|
265 |
+
# struct_data = ut.strcuture_document_data(raw_text)
|
266 |
+
# if struct_data:
|
267 |
+
# structured_data.append(struct_data)
|
268 |
+
|
269 |
+
# attachments.append(Attachment(attachment_len = len(attachment_data.get("data", "")),filename=filename, data=attachment_data.get("data", "")))
|
270 |
+
# return attachments,structured_data
|
271 |
|
272 |
|
273 |
def extract_text(html_content: str) -> str:
|