Spaces:
Sleeping
Sleeping
Update controllers/ws_controller.py
Browse files- controllers/ws_controller.py +35 -4
controllers/ws_controller.py
CHANGED
@@ -99,6 +99,25 @@ async def get_messages(code: str,websocket:WebSocket,start,brand_name: Optional[
|
|
99 |
# print(messages)
|
100 |
return messages
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
async def process_message(message:Message, websocket:WebSocket, chunk_size:int):
|
103 |
logging.info("process_message")
|
104 |
print(message)
|
@@ -218,10 +237,22 @@ def extract_attachments_from_mail(access_token: str, message_data: dict) -> List
|
|
218 |
if filename.endswith(".zip") or filename.endswith(".txt") or filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".gif"):
|
219 |
continue
|
220 |
data = attachment_data.get("data", "")
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
|
226 |
attachments.append(Attachment(attachment_len = len(attachment_data.get("data", "")),filename=filename, data=attachment_data.get("data", "")))
|
227 |
return attachments,structured_data
|
|
|
99 |
# print(messages)
|
100 |
return messages
|
101 |
|
102 |
+
def is_file_encrypted(filename: str, data: str) -> bool:
|
103 |
+
try:
|
104 |
+
file_content = io.BytesIO(base64.urlsafe_b64decode(data))
|
105 |
+
if filename.lower().endswith('.pdf'):
|
106 |
+
return is_pdf_encrypted(file_content)
|
107 |
+
# Add checks for other file types as needed
|
108 |
+
except Exception as e:
|
109 |
+
print(f"Error checking encryption for {filename}: {str(e)}")
|
110 |
+
return True # Assume encrypted if there's an error
|
111 |
+
return False
|
112 |
+
|
113 |
+
def is_pdf_encrypted(file_content):
|
114 |
+
try:
|
115 |
+
pdf_reader = PyPDF2.PdfReader(file_content)
|
116 |
+
return pdf_reader.is_encrypted
|
117 |
+
except:
|
118 |
+
return True # Assume encrypted if there's an error
|
119 |
+
|
120 |
+
|
121 |
async def process_message(message:Message, websocket:WebSocket, chunk_size:int):
|
122 |
logging.info("process_message")
|
123 |
print(message)
|
|
|
237 |
if filename.endswith(".zip") or filename.endswith(".txt") or filename.endswith(".png") or filename.endswith(".jpg") or filename.endswith(".jpeg") or filename.endswith(".gif"):
|
238 |
continue
|
239 |
data = attachment_data.get("data", "")
|
240 |
+
if is_file_encrypted(filename, data):
|
241 |
+
print(f"Skipping encrypted file: {filename}")
|
242 |
+
continue # Skip this file if it's encrypted
|
243 |
+
|
244 |
+
try:
|
245 |
+
raw_text = ut.extract_text_from_attachment(filename, data)
|
246 |
+
struct_data = ut.strcuture_document_data(raw_text)
|
247 |
+
if struct_data:
|
248 |
+
structured_data.append(struct_data)
|
249 |
+
except Exception as e:
|
250 |
+
print(f"Error processing attachment {filename}: {str(e)}")
|
251 |
+
continue # Skip this attachment if there's an error
|
252 |
+
# raw_text=ut.extract_text_from_attachment(filename , data)
|
253 |
+
# struct_data = ut.strcuture_document_data(raw_text)
|
254 |
+
# if struct_data:
|
255 |
+
# structured_data.append(struct_data)
|
256 |
|
257 |
attachments.append(Attachment(attachment_len = len(attachment_data.get("data", "")),filename=filename, data=attachment_data.get("data", "")))
|
258 |
return attachments,structured_data
|