Spaces:

Hushh
/

hushh-valet-chat

Sleeping

App Files Files Community

Omkar008 commited on Jun 21

Commit

e01e08c

•

1 Parent(s): 21791d0

Update controllers/ws_controller.py

Browse files

Files changed (1) hide show

controllers/ws_controller.py +37 -37

controllers/ws_controller.py CHANGED Viewed

@@ -10,6 +10,7 @@ from bs4 import BeautifulSoup
 from models.models import Message, Attachment
 from fastapi import WebSocket
 from services import utils as ut
 # from models import supabase_models as sp
 import asyncio
 def get_company_type(company_name:str)->str:
@@ -17,7 +18,8 @@ def get_company_type(company_name:str)->str:
     print(company_types_dict["louis vuitton"])
     return company_types_dict.get(company_name.lower(), 'Others')
-async def get_messages(code: str,websocket:WebSocket,brand_name: Optional[str] = None):
     access_token = code
     g_query = f'(subject:"your order" OR subject:receipts OR subject:receipt OR subject:aankoopbon OR subject:reçu OR subject:invoice OR subject:invoices OR category:purchases) has:attachment'
     if brand_name is not None:
@@ -36,37 +38,50 @@ async def get_messages(code: str,websocket:WebSocket,brand_name: Optional[str] =
         return None
-        # if message_id :
-        #     message_json = fetch_message_data(access_token,message_id)
-        #     # await process_message(message_json,websocket,20000)
-        # if message_id:
-        #     return await asyncio.to_thread(fetch_message_data, access_token, message_id)
-        # return None
-    # with ProcessPoolExecutor(max_workers=4) as executor:
     while True:
-        gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={g_query}"
         if page_token:
             gmail_url += f"&pageToken={page_token}"
         gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"})
         gmail_data = gmail_response.json()
-        print(len(gmail_data['messages']))
         print(gmail_data)
         if "messages" in gmail_data:
-            with ThreadPoolExecutor(max_workers=15) as executor:
                 futures=[executor.submit(fetch_message_wrapper, message_data,websocket) for message_data in
                                gmail_data["messages"]]
                 for future in futures:
                     message = future.result()
                     print(message)
                     if message:
                         # Process and send the message immediately
-                        await process_message(message, websocket, 20000)
                     # if message:
                     #     messages.append(message)
             print("Messages to be sent")
@@ -90,7 +105,8 @@ async def process_message(message:Message, websocket:WebSocket, chunk_size:int):
     if message:
         message_json = message.to_json()
         logging.info(f"{message_json}")
-        await send_message_in_chunks(websocket, message_json, chunk_size)
         await websocket.send_text("NEXT_MESSAGE")
@@ -103,25 +119,19 @@ def fetch_message_data(access_token: str, message_id: str) -> Message:
     company_from_mail = extract_domain_name(message_data['payload']['headers'], subject)
     body = extract_body_from_mail(message_data)
     attachments,structed_attachment_data = extract_attachments_from_mail(access_token, message_data)
     high_level_company_type = get_company_type(company_from_mail)
-    # structed_attachment_data = extract_json_from_attachments(access_token , message_data)
     body_len = 0
     if body is not None :
         body_len = len(body)
-    # print("subject: ")
-    # print(subject)
-    # print("company name: ")
-    # print(company_from_mail)
-    # print("Printing the body of the mail: ")
-    # print(body)
-    # print("Printing attachment Data: ")
-    # print(attachments)
-    # print("Completed this mail.")
     return Message(message_id=message_id, body_len=body_len,body=body, attachments=attachments, company=company_from_mail,high_level_company_type=high_level_company_type,structured_data = structed_attachment_data)
@@ -159,16 +169,6 @@ def extract_domain_from_email(email_string: str) -> Optional[str]:
         return None
-# def extract_body_from_mail(message_data: dict) -> str:
-#     body = None
-#     if "payload" in message_data and "parts" in message_data["payload"]:
-#         for part in message_data["payload"]["parts"]:
-#             if 'mimeType' in part and (part['mimeType'] == 'text/plain' or part['mimeType'] == 'text/html'):
-#                 body_data = part['body'].get('data', '')
-#                 body_base64 = base64.urlsafe_b64decode(body_data)
-#                 body = extract_text(body_base64)
-#     return body
 def extract_body_from_mail(message_data: dict) -> str:
     body = None
@@ -237,10 +237,10 @@ def extract_text(html_content: str) -> str:
     return text
-async def websocket_main(code: str,  websocket: WebSocket,brand_name: Optional[str] = None):
     access_token = code
     # messages = get_messages(access_token,websocket,brand_name)
-    await get_messages(access_token,websocket,brand_name)
     # print("websocket_main")
     # print(messages)
     # # logging.info(f"brand_name:{brand_name}")

 from models.models import Message, Attachment
 from fastapi import WebSocket
 from services import utils as ut
+import time
 # from models import supabase_models as sp
 import asyncio
 def get_company_type(company_name:str)->str:
     print(company_types_dict["louis vuitton"])
     return company_types_dict.get(company_name.lower(), 'Others')
+async def get_messages(code: str,websocket:WebSocket,start,brand_name: Optional[str] = None):
+    await websocket.send_text("Test text!!")
     access_token = code
     g_query = f'(subject:"your order" OR subject:receipts OR subject:receipt OR subject:aankoopbon OR subject:reçu OR subject:invoice OR subject:invoices OR category:purchases) has:attachment'
     if brand_name is not None:
         return None
+    end = time.time()
+    print("time 0")
+    print(end - start)
+    start1 = time.time()
     while True:
+        start2= time.time()
+        gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={g_query}&maxResults={30}"
         if page_token:
             gmail_url += f"&pageToken={page_token}"
+        # print(gmail_url)
         gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"})
+        # print(gmail_response.text)
+        end2 = time.time()
+        print("End 2 ")
+        print(end2-start2)
+        print("response length")
+        print(gmail_response.content.__len__())
         gmail_data = gmail_response.json()
         print(gmail_data)
+        print(len(gmail_data['messages']))
         if "messages" in gmail_data:
+            with ThreadPoolExecutor(max_workers=50) as executor:
                 futures=[executor.submit(fetch_message_wrapper, message_data,websocket) for message_data in
                                gmail_data["messages"]]
+                print(len(futures))
+                print(futures)
                 for future in futures:
                     message = future.result()
                     print(message)
                     if message:
                         # Process and send the message immediately
+                        end1 = time.time()
+                        print("time 1")
+                        print(end1-start1)
+                        await process_message(message, websocket, 100000)
                     # if message:
                     #     messages.append(message)
             print("Messages to be sent")
     if message:
         message_json = message.to_json()
         logging.info(f"{message_json}")
+        await send_message_in_chunks(websocket, message_json, 50000)
+        # await websocket.send_text(str(message_json))
         await websocket.send_text("NEXT_MESSAGE")
     company_from_mail = extract_domain_name(message_data['payload']['headers'], subject)
     body = extract_body_from_mail(message_data)
+    start3= time.time()
     attachments,structed_attachment_data = extract_attachments_from_mail(access_token, message_data)
+    end3=time.time()
+    print("time 5")
+    print(end3 - start3)
     high_level_company_type = get_company_type(company_from_mail)
     body_len = 0
     if body is not None :
         body_len = len(body)
     return Message(message_id=message_id, body_len=body_len,body=body, attachments=attachments, company=company_from_mail,high_level_company_type=high_level_company_type,structured_data = structed_attachment_data)
         return None
 def extract_body_from_mail(message_data: dict) -> str:
     body = None
     return text
+async def websocket_main(code: str,  websocket: WebSocket,start,brand_name: Optional[str] = None):
     access_token = code
     # messages = get_messages(access_token,websocket,brand_name)
+    await get_messages(access_token,websocket,start,brand_name)
     # print("websocket_main")
     # print(messages)
     # # logging.info(f"brand_name:{brand_name}")