Omkar008 commited on
Commit
8882ecc
1 Parent(s): eef32e5

Update controllers/ws_controller.py

Browse files
Files changed (1) hide show
  1. controllers/ws_controller.py +35 -4
controllers/ws_controller.py CHANGED
@@ -99,6 +99,25 @@ async def get_messages(code: str,websocket:WebSocket,start,brand_name: Optional[
99
  # print(messages)
100
  return messages
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  async def process_message(message:Message, websocket:WebSocket, chunk_size:int):
103
  logging.info("process_message")
104
  print(message)
@@ -218,10 +237,22 @@ def extract_attachments_from_mail(access_token: str, message_data: dict) -> List
218
  if filename.endswith(".zip") or filename.endswith(".txt") or filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".gif"):
219
  continue
220
  data = attachment_data.get("data", "")
221
- raw_text=ut.extract_text_from_attachment(filename , data)
222
- struct_data = ut.strcuture_document_data(raw_text)
223
- if struct_data:
224
- structured_data.append(struct_data)
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
  attachments.append(Attachment(attachment_len = len(attachment_data.get("data", "")),filename=filename, data=attachment_data.get("data", "")))
227
  return attachments,structured_data
 
99
  # print(messages)
100
  return messages
101
 
102
+ def is_file_encrypted(filename: str, data: str) -> bool:
103
+ try:
104
+ file_content = io.BytesIO(base64.urlsafe_b64decode(data))
105
+ if filename.lower().endswith('.pdf'):
106
+ return is_pdf_encrypted(file_content)
107
+ # Add checks for other file types as needed
108
+ except Exception as e:
109
+ print(f"Error checking encryption for {filename}: {str(e)}")
110
+ return True # Assume encrypted if there's an error
111
+ return False
112
+
113
+ def is_pdf_encrypted(file_content):
114
+ try:
115
+ pdf_reader = PyPDF2.PdfReader(file_content)
116
+ return pdf_reader.is_encrypted
117
+ except:
118
+ return True # Assume encrypted if there's an error
119
+
120
+
121
  async def process_message(message:Message, websocket:WebSocket, chunk_size:int):
122
  logging.info("process_message")
123
  print(message)
 
237
  if filename.endswith(".zip") or filename.endswith(".txt") or filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".gif"):
238
  continue
239
  data = attachment_data.get("data", "")
240
+ if is_file_encrypted(filename, data):
241
+ print(f"Skipping encrypted file: {filename}")
242
+ continue # Skip this file if it's encrypted
243
+
244
+ try:
245
+ raw_text = ut.extract_text_from_attachment(filename, data)
246
+ struct_data = ut.strcuture_document_data(raw_text)
247
+ if struct_data:
248
+ structured_data.append(struct_data)
249
+ except Exception as e:
250
+ print(f"Error processing attachment {filename}: {str(e)}")
251
+ continue # Skip this attachment if there's an error
252
+ # raw_text=ut.extract_text_from_attachment(filename , data)
253
+ # struct_data = ut.strcuture_document_data(raw_text)
254
+ # if struct_data:
255
+ # structured_data.append(struct_data)
256
 
257
  attachments.append(Attachment(attachment_len = len(attachment_data.get("data", "")),filename=filename, data=attachment_data.get("data", "")))
258
  return attachments,structured_data