Spaces:

Omkar008
/

receipt_radar_test

Sleeping

App Files Files Community

Omkar008 commited on Feb 7

Commit

4c0eeb2

•

1 Parent(s): 0527f8f

Update main.py

Browse files

Files changed (1) hide show

main.py +112 -112

main.py CHANGED Viewed

@@ -62,124 +62,124 @@ async def test_google(code:str):
     access_token = response.json().get("access_token")
     print("printing access token , yo yo test")
     print(access_token)
-    # if not access_token:
-    #     raise HTTPException(status_code=400, detail="Authorization code not provided")
-    print("Entered this function, for testing purposes")
-    brand_name = "louis vuitton"
-    user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token}"})
-    page_token = None
-    messages = []
-    # user_query = f"subject:((receipt {brand_name}) OR (receipts {brand_name})  OR (reçu {brand_name}) OR (reçus {brand_name}) OR (Quittung {brand_name}) OR (Quittungen {brand_name}) OR (aankoopbon {brand_name}) OR (aankoopbonnen {brand_name}) OR (recibo {brand_name}) OR (recibos {brand_name}) OR (ricevuta {brand_name}) OR (ricevute {brand_name}) OR (ontvangstbewijs {brand_name}) OR (ontvangstbewijzen {brand_name})) has:attachment"
-    # user_query = f"{brand_name} label:^smartlabel_receipt"
-    user_query = f"(label:^smartlabel_receipt OR (subject:your AND subject:order) OR subject:receipts OR subject:receipt OR subject:invoice OR subject:invoice)) AND subject:amazon"
-    # user_query = """("invoice" OR (("tracking" OR "track") AND ("delivery" OR "package"))) OR (subject:order OR subject:receipt OR subject:receipts OR subject:invoice OR subject:invoice)"""
-    while True:
-        # Construct Gmail API request with pageToken
-        gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={user_query}"
-        if page_token:
-            gmail_url += f"&pageToken={page_token}"
-        gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"})
-        gmail_data = gmail_response.json()
-        # Check if there are messages in the response
-        if "messages" in gmail_data:
-            messages.extend(gmail_data["messages"])
-        # Check if there are more pages
-        if "nextPageToken" in gmail_data:
-            page_token = gmail_data["nextPageToken"]
-        else:
-            break  # No more pages, exit the loop
-    unique_thread_ids = set()
-    filtered_data_list = []
-    for entry in messages:
-        thread_id = entry['threadId']
-        if thread_id not in unique_thread_ids:
-            unique_thread_ids.add(thread_id)
-            filtered_data_list.append(entry)
-    attachments = []
-    attachment_no = 0
-    data_new = {}
-    for i,message in enumerate(messages) :
-        # print(i)
-        # print(message)
-        if message:
-            message_id = message.get("id")
-            print(message_id)
-            if message_id:
-                message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}"
-                message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token}"})
-                message_data = message_response.json()
-                print("printing message_data response json")
-                print(message_data)
-                print("Finished printing message_data response json")
-                subject = ''
-                body = ''
-                print("printing body")
-                print(message_data['snippet'])
-                if 'payload' in message_data and 'headers' in message_data['payload']:
-                    headers = message_data['payload']['headers']
-                    for header in headers:
-                        if header['name'] == 'Subject':
-                            subject = header['value']
-                    if 'parts' in message_data['payload']:
-                        parts = message_data['payload']['parts']
-                        print("printing parts")
-                        print(parts)
-                        for part in parts:
-                            if part['mimeType'] == 'text/plain' or part['mimeType'] == 'text/html':
-                                body_data = part['body']['data']
-                                print("printing body data")
-                                print(body_data)
-                                body = base64.urlsafe_b64decode(body_data)
-                print("Subject:", subject)
-                if body:
-                    text,links=extract_text_and_links(body)
-                    if text:
-                        print("Printing extracted Text: ")
-                        print(text)
-                    else:
-                        print("No text found or there was some error parsing.")
-                if links:
-                    print("\nLinks:")
-                    for link_text, link_url in links:
-                        print(f"{link_text}: {link_url}")
-                else:
-                    print("No links found or there was some error in parsing or maybe don't use for loop.")
-                # Check for parts in the message payload
-                if "payload" in message_data and "parts" in message_data["payload"]:
-                    for part in message_data["payload"]["parts"]:
-                        if "body" in part and "attachmentId" in part["body"]:
-                            attachment_id = part["body"]["attachmentId"]
-                            attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}"
-                            attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token}"})
-                            attachment_data = attachment_response.json()
-                            data = attachment_data.get("data")
-                            filename = part.get("filename", "untitled.txt")
-                            if data:
-                                data_new[filename]=data[:10]
-                                # attachment_content = base64.urlsafe_b64decode(data)
-                                # extracted_text = await extract_text_from_attachment(filename, attachment_content)
-                                attachment_no+=1
-    return {"attachment_count":attachment_no,"attachment_content":data_new}
 def extract_text_and_links(html_content):

     access_token = response.json().get("access_token")
     print("printing access token , yo yo test")
     print(access_token)
+    return {"access_token":access_token}
+    # # if not access_token:
+    # #     raise HTTPException(status_code=400, detail="Authorization code not provided")
+    # print("Entered this function, for testing purposes")
+    # brand_name = "louis vuitton"
+    # user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token}"})
+    # page_token = None
+    # messages = []
+    # # user_query = f"subject:((receipt {brand_name}) OR (receipts {brand_name})  OR (reçu {brand_name}) OR (reçus {brand_name}) OR (Quittung {brand_name}) OR (Quittungen {brand_name}) OR (aankoopbon {brand_name}) OR (aankoopbonnen {brand_name}) OR (recibo {brand_name}) OR (recibos {brand_name}) OR (ricevuta {brand_name}) OR (ricevute {brand_name}) OR (ontvangstbewijs {brand_name}) OR (ontvangstbewijzen {brand_name})) has:attachment"
+    # # user_query = f"{brand_name} label:^smartlabel_receipt"
+    # user_query = f"(label:^smartlabel_receipt OR (subject:your AND subject:order) OR subject:receipts OR subject:receipt OR subject:invoice OR subject:invoice)) AND subject:amazon"
+    # # user_query = """("invoice" OR (("tracking" OR "track") AND ("delivery" OR "package"))) OR (subject:order OR subject:receipt OR subject:receipts OR subject:invoice OR subject:invoice)"""
+    # while True:
+    #     # Construct Gmail API request with pageToken
+    #     gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={user_query}"
+    #     if page_token:
+    #         gmail_url += f"&pageToken={page_token}"
+    #     gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"})
+    #     gmail_data = gmail_response.json()
+    #     # Check if there are messages in the response
+    #     if "messages" in gmail_data:
+    #         messages.extend(gmail_data["messages"])
+    #     # Check if there are more pages
+    #     if "nextPageToken" in gmail_data:
+    #         page_token = gmail_data["nextPageToken"]
+    #     else:
+    #         break  # No more pages, exit the loop
+    # unique_thread_ids = set()
+    # filtered_data_list = []
+    # for entry in messages:
+    #     thread_id = entry['threadId']
+    #     if thread_id not in unique_thread_ids:
+    #         unique_thread_ids.add(thread_id)
+    #         filtered_data_list.append(entry)
+    # attachments = []
+    # attachment_no = 0
+    # data_new = {}
+    # for i,message in enumerate(messages) :
+    #     # print(i)
+    #     # print(message)
+    #     if message:
+    #         message_id = message.get("id")
+    #         print(message_id)
+    #         if message_id:
+    #             message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}"
+    #             message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token}"})
+    #             message_data = message_response.json()
+    #             print("printing message_data response json")
+    #             print(message_data)
+    #             print("Finished printing message_data response json")
+    #             subject = ''
+    #             body = ''
+    #             print("printing body")
+    #             print(message_data['snippet'])
+    #             if 'payload' in message_data and 'headers' in message_data['payload']:
+    #                 headers = message_data['payload']['headers']
+    #                 for header in headers:
+    #                     if header['name'] == 'Subject':
+    #                         subject = header['value']
+    #                 if 'parts' in message_data['payload']:
+    #                     parts = message_data['payload']['parts']
+    #                     print("printing parts")
+    #                     print(parts)
+    #                     for part in parts:
+    #                         if part['mimeType'] == 'text/plain' or part['mimeType'] == 'text/html':
+    #                             body_data = part['body']['data']
+    #                             print("printing body data")
+    #                             print(body_data)
+    #                             body = base64.urlsafe_b64decode(body_data)
+    #             print("Subject:", subject)
+    #             if body:
+    #                 text,links=extract_text_and_links(body)
+    #                 if text:
+    #                     print("Printing extracted Text: ")
+    #                     print(text)
+    #                 else:
+    #                     print("No text found or there was some error parsing.")
+    #             if links:
+    #                 print("\nLinks:")
+    #                 for link_text, link_url in links:
+    #                     print(f"{link_text}: {link_url}")
+    #             else:
+    #                 print("No links found or there was some error in parsing or maybe don't use for loop.")
+    #             # Check for parts in the message payload
+    #             if "payload" in message_data and "parts" in message_data["payload"]:
+    #                 for part in message_data["payload"]["parts"]:
+    #                     if "body" in part and "attachmentId" in part["body"]:
+    #                         attachment_id = part["body"]["attachmentId"]
+    #                         attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}"
+    #                         attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token}"})
+    #                         attachment_data = attachment_response.json()
+    #                         data = attachment_data.get("data")
+    #                         filename = part.get("filename", "untitled.txt")
+    #                         if data:
+    #                             data_new[filename]=data[:10]
+    #                             # attachment_content = base64.urlsafe_b64decode(data)
+    #                             # extracted_text = await extract_text_from_attachment(filename, attachment_content)
+    #                             attachment_no+=1
+    # return {"attachment_count":attachment_no,"attachment_content":data_new}
 def extract_text_and_links(html_content):