Omkar008 commited on
Commit
8a73b97
1 Parent(s): 31b785c

Update controllers/ws_controller.py

Browse files
Files changed (1) hide show
  1. controllers/ws_controller.py +22 -19
controllers/ws_controller.py CHANGED
@@ -118,7 +118,7 @@ async def get_messages(code: str,websocket:WebSocket,start,brand_name: Optional[
118
  async def process_message(message:Message, websocket:WebSocket, chunk_size:int):
119
  logging.info("process_message")
120
  # print(message)
121
- if message:
122
  message_json = message.to_json()
123
  # logging.info(f"{message_json}")
124
  await send_message_in_chunks(websocket, message_json, 50000)
@@ -280,25 +280,28 @@ def extract_attachments_from_mail(access_token: str, message_data: dict) -> List
280
  print(f"Error processing attachment {filename}: {str(e)}")
281
  continue
282
  struct_data = ut.strcuture_document_data(raw_text)
283
- st_str = """
284
- {
285
- "brand": "INSERT BRAND NAME FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null",
286
- "total_cost": "INSERT TOTAL COST FROM THE RECEIPT OCR TEXT. TOTAL AMOUNT IS MAXIMUM VALUE IN THE OCR TEXT. IF NOT PRESENT RETURN null",
287
- "location": "INSERT LOCATION FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null",
288
- "purchase_category": "INSERT PURCHASE CATEGORY FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null",
289
- "brand_category": "INSERT BRAND CATEGORY FROM THE RECEIPT OCR TEXT. CHOOSE CLOSEST BRAND CATEGORY BASED ON THE OCR FROM THIS ARRAY [\"Fashion and Apparel\",\"Jewelry and Watches\",\"Beauty and Personal Care\",\"Automobiles\",\"Real Estate\",\"Travel and Leisure\",\"Culinary Services\",\"Home and Lifestyle\",\"Technology and Electronics\",\"Sports and Leisure\",\"Art and Collectibles\",\"Health and Wellness\",\"Stationery and Writing Instruments\",\"Children and Baby\",\"Pet Accessories\",\"Financial Services\",\"Airline Services\",\"Accommodation Services\",\"Beverages Services\",\"Services\"] ELSE IF NOT PRESENT RETURN null",
290
- "Date": "INSERT RECEIPT DATE FROM THE RECEIPT OCR TEXT. IF NOT PRESENT RETURN null. FORMAT: dd-mm-yyyy",
291
- "currency": "INSERT CURRENCY FROM THE RECEIPT OCR TEXT. LOOK FOR CURRENCY SYMBOLS (e.g., $, \u20ac, \u00a3, \u00a5) OR CURRENCY CODES (e.g., USD, EUR, GBP, JPY).ALWAYS RETURN CURRENCY CODE.IF NOT FOUND RETURN null.",
292
- "filename": "GENERATE A FILENAME BASED ON THE RECEIPT OCR TEXT. USE THE FORMAT: 'PURCHASE_TYPE_BRAND_DATE' (e.g., 'clothing_gucci_20230715'). USE UNDERSCORES FOR SPACES.IF YOU CANNOT FIND THE COMPONENTS RETURN THIS FIELD AS NULL.",
293
- "payment_method": "INSERT PAYMENT METHOD FROM THE RECEIPT OCR TEXT. LOOK FOR KEYWORDS LIKE 'CASH', 'CARD', 'CREDIT', 'DEBIT', 'VISA', 'MASTERCARD', 'AMEX', 'PAYPAL', ETC. IF NOT FOUND RETURN null."
294
  }
295
- """
296
- if struct_data is None or struct_data == st_str :
297
- struct_data = None
298
- else:
299
- structured_data.append(struct_data)
300
-
301
- # structured_data.append(struct_data)
 
 
 
 
302
 
303
 
304
 
 
118
  async def process_message(message:Message, websocket:WebSocket, chunk_size:int):
119
  logging.info("process_message")
120
  # print(message)
121
+ if message and message.structured_data:
122
  message_json = message.to_json()
123
  # logging.info(f"{message_json}")
124
  await send_message_in_chunks(websocket, message_json, 50000)
 
280
  print(f"Error processing attachment {filename}: {str(e)}")
281
  continue
282
  struct_data = ut.strcuture_document_data(raw_text)
283
+ st_str ={
284
+ "brand": "INSERT BRAND NAME",
285
+ "total_cost": "INSERT TOTAL COST",
286
+ "location": "INSERT LOCATION FROM",
287
+ "purchase_category": "INSERT PURCHASE CATEGORY",
288
+ "brand_category": "INSERT BRAND CATEGORY",
289
+ "Date": "INSERT RECEIPT DATE",
290
+ "currency": "INSERT CURRENCY",
291
+ "filename": "GENERATE A FILENAME",
292
+ "payment_method": "INSERT PAYMENT METHOD"
 
293
  }
294
+ if struct_data:
295
+ for key,value in st_str.items():
296
+ if struct_data[key]:
297
+ if value in struct_data[key]:
298
+
299
+ struct_data[key]=None
300
+ all_null = all(value is None for value in struct_data.values())
301
+ if all_null:
302
+ struct_data=None
303
+
304
+ structured_data.append(struct_data)
305
 
306
 
307