Omkar008 commited on
Commit
0527f8f
1 Parent(s): ccf236c

Update get_gmail_data.py

Browse files
Files changed (1) hide show
  1. get_gmail_data.py +25 -5
get_gmail_data.py CHANGED
@@ -148,11 +148,22 @@ class GmailDataExtractor:
148
 
149
  if 'payload' in message_data and 'parts' in message_data['payload']:
150
  parts = message_data['payload']['parts']
 
 
 
 
 
 
 
 
 
 
151
  for part in parts:
152
  if 'mimeType' not in part:
153
  continue
154
 
155
  mime_type = part['mimeType']
 
156
  if mime_type == 'text/plain' or mime_type == 'text/html':
157
  body_data = part['body'].get('data', '')
158
  body = base64.urlsafe_b64decode(body_data).decode('utf-8')
@@ -166,9 +177,18 @@ class GmailDataExtractor:
166
 
167
  if data:
168
  # Save only the first 10 characters of the attachment data
169
- return subject,body ,{"filename":filename , "data":data}
 
 
170
 
171
- return subject, body,None
 
 
 
 
 
 
 
172
 
173
  def extract_text_and_links(html_content: str) -> tuple:
174
  """
@@ -214,14 +234,14 @@ class GmailDataExtractor:
214
  messages = self.__fetch_messages()
215
  results = []
216
  for message in messages:
217
- subject, body, attachment_data = self.__process_message(message)
218
 
219
  """ Handling None values """
220
- subject = subject if subject is not None else ""
221
  body = body if body is not None else None
222
  attachment_data = attachment_data if attachment_data is not None else {}
 
223
 
224
- results.append({"body": body, "attachment_data": [attachment_data]})
225
 
226
  return {"results": results}
227
 
 
148
 
149
  if 'payload' in message_data and 'parts' in message_data['payload']:
150
  parts = message_data['payload']['parts']
151
+ payload = message_data['payload']
152
+ brand_from_gmail = ''
153
+ company_from_gmail = ''
154
+ if payload['headers']['name'] == 'from':
155
+ brand_from_gmail = payload['headers']['value']
156
+ company_from_gmail = extract_domain_from_email(brand_from_gmail)
157
+ else:
158
+ company_from_gmail = None
159
+
160
+
161
  for part in parts:
162
  if 'mimeType' not in part:
163
  continue
164
 
165
  mime_type = part['mimeType']
166
+
167
  if mime_type == 'text/plain' or mime_type == 'text/html':
168
  body_data = part['body'].get('data', '')
169
  body = base64.urlsafe_b64decode(body_data).decode('utf-8')
 
177
 
178
  if data:
179
  # Save only the first 10 characters of the attachment data
180
+ return subject,body ,{"filename":filename , "data":data} , company_from_gmail
181
+
182
+ return subject, body,None , company_from_gmail
183
 
184
+ def extract_domain_from_email(email):
185
+ regex = r"@(.+)$"
186
+ match = re.search(regex,email)
187
+ if match :
188
+ return match.group(1)
189
+ else:
190
+ return None
191
+
192
 
193
  def extract_text_and_links(html_content: str) -> tuple:
194
  """
 
234
  messages = self.__fetch_messages()
235
  results = []
236
  for message in messages:
237
+ subject, body, attachment_data , company_name = self.__process_message(message)
238
 
239
  """ Handling None values """
 
240
  body = body if body is not None else None
241
  attachment_data = attachment_data if attachment_data is not None else {}
242
+ company_associated = company_name if company_name is not None else None
243
 
244
+ results.append({"body": body, "attachment_data": [attachment_data] ,'company_associated':company_associated})
245
 
246
  return {"results": results}
247