Spaces:
Running
Running
Update get_gmail_data.py
Browse files- get_gmail_data.py +27 -4
get_gmail_data.py
CHANGED
@@ -3,6 +3,9 @@ import base64
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import re
|
5 |
import jwt
|
|
|
|
|
|
|
6 |
class GmailDataExtractor:
|
7 |
|
8 |
def __init__(self,jwt:str , user_input: str = None) -> None:
|
@@ -133,6 +136,7 @@ class GmailDataExtractor:
|
|
133 |
"""
|
134 |
print("process_messages")
|
135 |
message_id = message.get("id")
|
|
|
136 |
if not message_id:
|
137 |
return None, None, [], False
|
138 |
subject=''
|
@@ -178,9 +182,27 @@ class GmailDataExtractor:
|
|
178 |
|
179 |
if data:
|
180 |
# Save only the first 10 characters of the attachment data
|
181 |
-
return subject,text ,{"filename":filename , "data":data} , company_from_gmail
|
182 |
|
183 |
-
return subject, text,None , company_from_gmail
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
|
185 |
def extract_domain_from_email(self,email_string):
|
186 |
# Extracting the email address using regex
|
@@ -241,14 +263,15 @@ class GmailDataExtractor:
|
|
241 |
messages = self.__fetch_messages()
|
242 |
results = []
|
243 |
for message in messages:
|
244 |
-
subject, body, attachment_data , company_name = self.__process_message(message)
|
245 |
|
246 |
""" Handling None values """
|
247 |
body = body if body is not None else None
|
248 |
attachment_data = attachment_data if attachment_data is not None else {}
|
249 |
company_associated = company_name if company_name is not None else None
|
|
|
250 |
|
251 |
-
results.append({"body": body, "attachment_data": [attachment_data] ,'company_associated':company_associated})
|
252 |
|
253 |
return {"results": results}
|
254 |
|
|
|
3 |
from bs4 import BeautifulSoup
|
4 |
import re
|
5 |
import jwt
|
6 |
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
7 |
+
from cryptography.hazmat.backends import default_backend
|
8 |
+
import os
|
9 |
class GmailDataExtractor:
|
10 |
|
11 |
def __init__(self,jwt:str , user_input: str = None) -> None:
|
|
|
136 |
"""
|
137 |
print("process_messages")
|
138 |
message_id = message.get("id")
|
139 |
+
encrypted_message_id = self.encrypt_message_id(message_id)
|
140 |
if not message_id:
|
141 |
return None, None, [], False
|
142 |
subject=''
|
|
|
182 |
|
183 |
if data:
|
184 |
# Save only the first 10 characters of the attachment data
|
185 |
+
return subject,text ,{"filename":filename , "data":data} , company_from_gmail , encrypted_message_id
|
186 |
|
187 |
+
return subject, text,None , company_from_gmail , encrypted_message_id
|
188 |
+
|
189 |
+
def encrypt_message_id(self,message_id:str):
|
190 |
+
key = os.getenv('AES_KEY')
|
191 |
+
|
192 |
+
# Initialize AES cipher with the key and CBC mode
|
193 |
+
cipher = Cipher(algorithms.AES(key), modes.CBC(), backend=default_backend())
|
194 |
+
|
195 |
+
# Create a encryptor object
|
196 |
+
encryptor = cipher.encryptor()
|
197 |
+
|
198 |
+
# Pad the message_id to be a multiple of 16 bytes (AES block size)
|
199 |
+
# This is necessary for AES encryption
|
200 |
+
message_id_padded = message_id.encode().rjust(32, b'\0')
|
201 |
+
|
202 |
+
# Encrypt the padded message_id
|
203 |
+
ciphertext = encryptor.update(message_id_padded) + encryptor.finalize()
|
204 |
+
return ciphertext
|
205 |
+
|
206 |
|
207 |
def extract_domain_from_email(self,email_string):
|
208 |
# Extracting the email address using regex
|
|
|
263 |
messages = self.__fetch_messages()
|
264 |
results = []
|
265 |
for message in messages:
|
266 |
+
subject, body, attachment_data , company_name , encrypt_mssg_id = self.__process_message(message)
|
267 |
|
268 |
""" Handling None values """
|
269 |
body = body if body is not None else None
|
270 |
attachment_data = attachment_data if attachment_data is not None else {}
|
271 |
company_associated = company_name if company_name is not None else None
|
272 |
+
en_msg_id = encrypt_mssg_id if encrypt_mssg_id is not None else None
|
273 |
|
274 |
+
results.append({"body": body, "attachment_data": [attachment_data] ,'company_associated':company_associated , "message_id":en_msg_id})
|
275 |
|
276 |
return {"results": results}
|
277 |
|