Omkar008 commited on
Commit
4c0eeb2
1 Parent(s): 0527f8f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +112 -112
main.py CHANGED
@@ -62,124 +62,124 @@ async def test_google(code:str):
62
  access_token = response.json().get("access_token")
63
  print("printing access token , yo yo test")
64
  print(access_token)
65
-
66
- # if not access_token:
67
- # raise HTTPException(status_code=400, detail="Authorization code not provided")
68
- print("Entered this function, for testing purposes")
69
- brand_name = "louis vuitton"
70
-
71
- user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token}"})
72
- page_token = None
73
- messages = []
74
- # user_query = f"subject:((receipt {brand_name}) OR (receipts {brand_name}) OR (reçu {brand_name}) OR (reçus {brand_name}) OR (Quittung {brand_name}) OR (Quittungen {brand_name}) OR (aankoopbon {brand_name}) OR (aankoopbonnen {brand_name}) OR (recibo {brand_name}) OR (recibos {brand_name}) OR (ricevuta {brand_name}) OR (ricevute {brand_name}) OR (ontvangstbewijs {brand_name}) OR (ontvangstbewijzen {brand_name})) has:attachment"
75
- # user_query = f"{brand_name} label:^smartlabel_receipt"
76
- user_query = f"(label:^smartlabel_receipt OR (subject:your AND subject:order) OR subject:receipts OR subject:receipt OR subject:invoice OR subject:invoice)) AND subject:amazon"
77
- # user_query = """("invoice" OR (("tracking" OR "track") AND ("delivery" OR "package"))) OR (subject:order OR subject:receipt OR subject:receipts OR subject:invoice OR subject:invoice)"""
78
- while True:
79
- # Construct Gmail API request with pageToken
80
 
81
- gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={user_query}"
82
- if page_token:
83
- gmail_url += f"&pageToken={page_token}"
84
 
85
- gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"})
86
- gmail_data = gmail_response.json()
87
 
88
- # Check if there are messages in the response
89
- if "messages" in gmail_data:
90
- messages.extend(gmail_data["messages"])
91
-
92
- # Check if there are more pages
93
- if "nextPageToken" in gmail_data:
94
- page_token = gmail_data["nextPageToken"]
95
- else:
96
- break # No more pages, exit the loop
97
-
98
- unique_thread_ids = set()
99
-
100
- filtered_data_list = []
101
-
102
- for entry in messages:
103
- thread_id = entry['threadId']
104
- if thread_id not in unique_thread_ids:
105
- unique_thread_ids.add(thread_id)
106
- filtered_data_list.append(entry)
107
-
108
- attachments = []
109
- attachment_no = 0
110
- data_new = {}
111
- for i,message in enumerate(messages) :
112
- # print(i)
113
- # print(message)
114
-
115
- if message:
116
- message_id = message.get("id")
117
- print(message_id)
118
- if message_id:
119
- message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}"
120
- message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token}"})
121
- message_data = message_response.json()
122
- print("printing message_data response json")
123
- print(message_data)
124
- print("Finished printing message_data response json")
125
-
126
- subject = ''
127
- body = ''
128
- print("printing body")
129
- print(message_data['snippet'])
130
- if 'payload' in message_data and 'headers' in message_data['payload']:
131
- headers = message_data['payload']['headers']
132
- for header in headers:
133
- if header['name'] == 'Subject':
134
- subject = header['value']
135
 
136
- if 'parts' in message_data['payload']:
137
- parts = message_data['payload']['parts']
138
- print("printing parts")
139
- print(parts)
140
-
141
- for part in parts:
142
- if part['mimeType'] == 'text/plain' or part['mimeType'] == 'text/html':
143
- body_data = part['body']['data']
144
- print("printing body data")
145
- print(body_data)
146
- body = base64.urlsafe_b64decode(body_data)
147
- print("Subject:", subject)
148
- if body:
149
- text,links=extract_text_and_links(body)
150
- if text:
151
- print("Printing extracted Text: ")
152
- print(text)
153
- else:
154
- print("No text found or there was some error parsing.")
155
-
156
- if links:
157
- print("\nLinks:")
158
- for link_text, link_url in links:
159
- print(f"{link_text}: {link_url}")
160
- else:
161
- print("No links found or there was some error in parsing or maybe don't use for loop.")
162
 
163
 
164
- # Check for parts in the message payload
165
- if "payload" in message_data and "parts" in message_data["payload"]:
166
- for part in message_data["payload"]["parts"]:
167
- if "body" in part and "attachmentId" in part["body"]:
168
- attachment_id = part["body"]["attachmentId"]
169
- attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}"
170
- attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token}"})
171
- attachment_data = attachment_response.json()
172
- data = attachment_data.get("data")
173
- filename = part.get("filename", "untitled.txt")
174
-
175
- if data:
176
- data_new[filename]=data[:10]
177
- # attachment_content = base64.urlsafe_b64decode(data)
178
- # extracted_text = await extract_text_from_attachment(filename, attachment_content)
179
-
180
- attachment_no+=1
181
-
182
- return {"attachment_count":attachment_no,"attachment_content":data_new}
183
 
184
 
185
  def extract_text_and_links(html_content):
 
62
  access_token = response.json().get("access_token")
63
  print("printing access token , yo yo test")
64
  print(access_token)
65
+ return {"access_token":access_token}
66
+ # # if not access_token:
67
+ # # raise HTTPException(status_code=400, detail="Authorization code not provided")
68
+ # print("Entered this function, for testing purposes")
69
+ # brand_name = "louis vuitton"
70
+
71
+ # user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token}"})
72
+ # page_token = None
73
+ # messages = []
74
+ # # user_query = f"subject:((receipt {brand_name}) OR (receipts {brand_name}) OR (reçu {brand_name}) OR (reçus {brand_name}) OR (Quittung {brand_name}) OR (Quittungen {brand_name}) OR (aankoopbon {brand_name}) OR (aankoopbonnen {brand_name}) OR (recibo {brand_name}) OR (recibos {brand_name}) OR (ricevuta {brand_name}) OR (ricevute {brand_name}) OR (ontvangstbewijs {brand_name}) OR (ontvangstbewijzen {brand_name})) has:attachment"
75
+ # # user_query = f"{brand_name} label:^smartlabel_receipt"
76
+ # user_query = f"(label:^smartlabel_receipt OR (subject:your AND subject:order) OR subject:receipts OR subject:receipt OR subject:invoice OR subject:invoice)) AND subject:amazon"
77
+ # # user_query = """("invoice" OR (("tracking" OR "track") AND ("delivery" OR "package"))) OR (subject:order OR subject:receipt OR subject:receipts OR subject:invoice OR subject:invoice)"""
78
+ # while True:
79
+ # # Construct Gmail API request with pageToken
80
 
81
+ # gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={user_query}"
82
+ # if page_token:
83
+ # gmail_url += f"&pageToken={page_token}"
84
 
85
+ # gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"})
86
+ # gmail_data = gmail_response.json()
87
 
88
+ # # Check if there are messages in the response
89
+ # if "messages" in gmail_data:
90
+ # messages.extend(gmail_data["messages"])
91
+
92
+ # # Check if there are more pages
93
+ # if "nextPageToken" in gmail_data:
94
+ # page_token = gmail_data["nextPageToken"]
95
+ # else:
96
+ # break # No more pages, exit the loop
97
+
98
+ # unique_thread_ids = set()
99
+
100
+ # filtered_data_list = []
101
+
102
+ # for entry in messages:
103
+ # thread_id = entry['threadId']
104
+ # if thread_id not in unique_thread_ids:
105
+ # unique_thread_ids.add(thread_id)
106
+ # filtered_data_list.append(entry)
107
+
108
+ # attachments = []
109
+ # attachment_no = 0
110
+ # data_new = {}
111
+ # for i,message in enumerate(messages) :
112
+ # # print(i)
113
+ # # print(message)
114
+
115
+ # if message:
116
+ # message_id = message.get("id")
117
+ # print(message_id)
118
+ # if message_id:
119
+ # message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}"
120
+ # message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token}"})
121
+ # message_data = message_response.json()
122
+ # print("printing message_data response json")
123
+ # print(message_data)
124
+ # print("Finished printing message_data response json")
125
+
126
+ # subject = ''
127
+ # body = ''
128
+ # print("printing body")
129
+ # print(message_data['snippet'])
130
+ # if 'payload' in message_data and 'headers' in message_data['payload']:
131
+ # headers = message_data['payload']['headers']
132
+ # for header in headers:
133
+ # if header['name'] == 'Subject':
134
+ # subject = header['value']
135
 
136
+ # if 'parts' in message_data['payload']:
137
+ # parts = message_data['payload']['parts']
138
+ # print("printing parts")
139
+ # print(parts)
140
+
141
+ # for part in parts:
142
+ # if part['mimeType'] == 'text/plain' or part['mimeType'] == 'text/html':
143
+ # body_data = part['body']['data']
144
+ # print("printing body data")
145
+ # print(body_data)
146
+ # body = base64.urlsafe_b64decode(body_data)
147
+ # print("Subject:", subject)
148
+ # if body:
149
+ # text,links=extract_text_and_links(body)
150
+ # if text:
151
+ # print("Printing extracted Text: ")
152
+ # print(text)
153
+ # else:
154
+ # print("No text found or there was some error parsing.")
155
+
156
+ # if links:
157
+ # print("\nLinks:")
158
+ # for link_text, link_url in links:
159
+ # print(f"{link_text}: {link_url}")
160
+ # else:
161
+ # print("No links found or there was some error in parsing or maybe don't use for loop.")
162
 
163
 
164
+ # # Check for parts in the message payload
165
+ # if "payload" in message_data and "parts" in message_data["payload"]:
166
+ # for part in message_data["payload"]["parts"]:
167
+ # if "body" in part and "attachmentId" in part["body"]:
168
+ # attachment_id = part["body"]["attachmentId"]
169
+ # attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}"
170
+ # attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token}"})
171
+ # attachment_data = attachment_response.json()
172
+ # data = attachment_data.get("data")
173
+ # filename = part.get("filename", "untitled.txt")
174
+
175
+ # if data:
176
+ # data_new[filename]=data[:10]
177
+ # # attachment_content = base64.urlsafe_b64decode(data)
178
+ # # extracted_text = await extract_text_from_attachment(filename, attachment_content)
179
+
180
+ # attachment_no+=1
181
+
182
+ # return {"attachment_count":attachment_no,"attachment_content":data_new}
183
 
184
 
185
  def extract_text_and_links(html_content):