Omkar008 commited on
Commit
3d6350d
1 Parent(s): 1e272ca

Update controllers/ws_controller.py

Browse files
Files changed (1) hide show
  1. controllers/ws_controller.py +10 -3
controllers/ws_controller.py CHANGED
@@ -71,9 +71,9 @@ def fetch_message_data(access_token: str, message_id: str) -> Message:
71
 
72
  body = extract_body_from_mail(message_data)
73
 
74
- attachments = extract_attachments_from_mail(access_token, message_data)
75
  high_level_company_type = get_company_type(company_from_mail)
76
- structed_attachment_data = extract_json_from_attachments(access_token , message_data)
77
 
78
 
79
  body_len = 0
@@ -175,14 +175,21 @@ def fetch_attachment_data(access_token: str, message_id: str, attachment_id: str
175
 
176
  def extract_attachments_from_mail(access_token: str, message_data: dict) -> List[Attachment]:
177
  attachments = []
 
178
  if "payload" in message_data and "parts" in message_data["payload"]:
179
  for part in message_data["payload"]["parts"]:
180
  if "body" in part and "attachmentId" in part["body"]:
181
  attachment_id = part["body"]["attachmentId"]
182
  attachment_data = fetch_attachment_data(access_token, message_data["id"], attachment_id)
183
  filename = part.get("filename", "untitled.txt")
 
 
 
 
 
 
184
  attachments.append(Attachment(attachment_len = len(attachment_data.get("data", "")),filename=filename, data=attachment_data.get("data", "")))
185
- return attachments
186
 
187
 
188
  def extract_text(html_content: str) -> str:
 
71
 
72
  body = extract_body_from_mail(message_data)
73
 
74
+ attachments,structed_attachment_data = extract_attachments_from_mail(access_token, message_data)
75
  high_level_company_type = get_company_type(company_from_mail)
76
+ # structed_attachment_data = extract_json_from_attachments(access_token , message_data)
77
 
78
 
79
  body_len = 0
 
175
 
176
  def extract_attachments_from_mail(access_token: str, message_data: dict) -> List[Attachment]:
177
  attachments = []
178
+ structured_data = []
179
  if "payload" in message_data and "parts" in message_data["payload"]:
180
  for part in message_data["payload"]["parts"]:
181
  if "body" in part and "attachmentId" in part["body"]:
182
  attachment_id = part["body"]["attachmentId"]
183
  attachment_data = fetch_attachment_data(access_token, message_data["id"], attachment_id)
184
  filename = part.get("filename", "untitled.txt")
185
+ data = attachment_data.get("data", "")
186
+ raw_text=ut.extract_text_from_attachment(filename , data)
187
+ struct_data = ut.strcuture_document_data(raw_text)
188
+ if struct_data:
189
+ structured_data.append(struct_data)
190
+
191
  attachments.append(Attachment(attachment_len = len(attachment_data.get("data", "")),filename=filename, data=attachment_data.get("data", "")))
192
+ return attachments,structured_data
193
 
194
 
195
  def extract_text(html_content: str) -> str: