Spaces:
Running
Running
Update get_gmail_data.py
Browse files- get_gmail_data.py +10 -7
get_gmail_data.py
CHANGED
@@ -170,8 +170,8 @@ class GmailDataExtractor:
|
|
170 |
|
171 |
if mime_type == 'text/plain' or mime_type == 'text/html':
|
172 |
body_data = part['body'].get('data', '')
|
173 |
-
body = base64.urlsafe_b64decode(body_data)
|
174 |
-
text= self.
|
175 |
|
176 |
if 'body' in part and 'attachmentId' in part['body']:
|
177 |
attachment_id = part['body']['attachmentId']
|
@@ -186,15 +186,18 @@ class GmailDataExtractor:
|
|
186 |
return subject, body,None , company_from_gmail
|
187 |
|
188 |
def extract_domain_from_email(email):
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
|
|
|
|
|
|
193 |
else:
|
194 |
return None
|
195 |
|
196 |
|
197 |
-
def
|
198 |
"""
|
199 |
Extracts text and links from HTML content.
|
200 |
|
|
|
170 |
|
171 |
if mime_type == 'text/plain' or mime_type == 'text/html':
|
172 |
body_data = part['body'].get('data', '')
|
173 |
+
body = base64.urlsafe_b64decode(body_data)
|
174 |
+
text= self.extract_text(body)
|
175 |
|
176 |
if 'body' in part and 'attachmentId' in part['body']:
|
177 |
attachment_id = part['body']['attachmentId']
|
|
|
186 |
return subject, body,None , company_from_gmail
|
187 |
|
188 |
def extract_domain_from_email(email):
|
189 |
+
# Extracting the email address using regex
|
190 |
+
email_address = re.search(r'[\w\.-]+@[\w\.-]+', email_string).group()
|
191 |
+
|
192 |
+
# Extracting the domain name from the email address
|
193 |
+
domain = email_address.split('@')[-1].split('.')[0]
|
194 |
+
if email_address and domain :
|
195 |
+
return domain
|
196 |
else:
|
197 |
return None
|
198 |
|
199 |
|
200 |
+
def extract_text(html_content: str) -> tuple:
|
201 |
"""
|
202 |
Extracts text and links from HTML content.
|
203 |
|