Omkar008 commited on
Commit
ce3d970
1 Parent(s): 30fac65

Update get_gmail_data.py

Browse files
Files changed (1) hide show
  1. get_gmail_data.py +10 -7
get_gmail_data.py CHANGED
@@ -170,8 +170,8 @@ class GmailDataExtractor:
170
 
171
  if mime_type == 'text/plain' or mime_type == 'text/html':
172
  body_data = part['body'].get('data', '')
173
- body = base64.urlsafe_b64decode(body_data).decode('utf-8')
174
- text= self.extract_text_and_links(body)
175
 
176
  if 'body' in part and 'attachmentId' in part['body']:
177
  attachment_id = part['body']['attachmentId']
@@ -186,15 +186,18 @@ class GmailDataExtractor:
186
  return subject, body,None , company_from_gmail
187
 
188
  def extract_domain_from_email(email):
189
- regex = r"@(.+)$"
190
- match = re.search(regex,email)
191
- if match :
192
- return match.group(1)
 
 
 
193
  else:
194
  return None
195
 
196
 
197
- def extract_text_and_links(html_content: str) -> tuple:
198
  """
199
  Extracts text and links from HTML content.
200
 
 
170
 
171
  if mime_type == 'text/plain' or mime_type == 'text/html':
172
  body_data = part['body'].get('data', '')
173
+ body = base64.urlsafe_b64decode(body_data)
174
+ text= self.extract_text(body)
175
 
176
  if 'body' in part and 'attachmentId' in part['body']:
177
  attachment_id = part['body']['attachmentId']
 
186
  return subject, body,None , company_from_gmail
187
 
188
  def extract_domain_from_email(email):
189
+ # Extracting the email address using regex
190
+ email_address = re.search(r'[\w\.-]+@[\w\.-]+', email_string).group()
191
+
192
+ # Extracting the domain name from the email address
193
+ domain = email_address.split('@')[-1].split('.')[0]
194
+ if email_address and domain :
195
+ return domain
196
  else:
197
  return None
198
 
199
 
200
+ def extract_text(html_content: str) -> tuple:
201
  """
202
  Extracts text and links from HTML content.
203