Omkar008 commited on
Commit
447ff33
1 Parent(s): 90bfa42

Update test.py

Browse files
Files changed (1) hide show
  1. test.py +29 -101
test.py CHANGED
@@ -27,7 +27,6 @@ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
27
  GOOGLE_CLIENT_ID = "485753721652-5uta3e18va2g6cnkldib2d68q39t4vod.apps.googleusercontent.com"
28
  GOOGLE_CLIENT_SECRET = "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"
29
  GOOGLE_REDIRECT_URI = "https://omkar008-receipt-radar-test.hf.space/auth/google"
30
- GOOGLE_REDIRECT_URI_hr = "https://receiptradar-0bb387d81174.herokuapp.com/auth/google"
31
  # Configure the logger
32
  logging.basicConfig(level=logging.DEBUG)
33
  logger = logging.getLogger(__name__)
@@ -54,18 +53,6 @@ async def auth_google(request: Request):
54
  print(code)
55
  if not code:
56
  raise HTTPException(status_code=400, detail="Authorization code not provided")
57
-
58
- # token_url = "https://accounts.google.com/o/oauth2/token"
59
- # print(code)
60
- # data = {
61
- # "code": code,
62
- # "client_id": GOOGLE_CLIENT_ID,
63
- # "client_secret": GOOGLE_CLIENT_SECRET,
64
- # "redirect_uri": GOOGLE_REDIRECT_URI,
65
- # "grant_type": "authorization_code",
66
- # }
67
- # response = requests.post(token_url, data=data)
68
- # access_token = response.json().get("access_token")
69
  access_token_new = code
70
 
71
  user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token_new}"})
@@ -123,17 +110,6 @@ async def auth_google(request: Request):
123
  attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
124
  attachment_no+=1
125
 
126
- # if data:
127
- # # Decode base64-encoded attachment data
128
- # attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
129
-
130
- # # Save the attachment to a file
131
- # save_path = f"/Users/omkarmalpure/Documents/Gmail_API/attachments/{filename}"
132
- # with open(save_path, "wb") as file:
133
- # file.write(attachment_content)
134
-
135
- # attachments.append(save_path)
136
- print(data_new)
137
  return {"attachment_count":attachment_no,"attachment_content":data_new}
138
 
139
 
@@ -141,66 +117,49 @@ async def send_chunked_data(websocket: WebSocket, filename: str, data: str):
141
  chunk_size = 1024 # Set an appropriate chunk size
142
  for i in range(0, len(data), chunk_size):
143
  await websocket.send_json({"filename": filename, "data_chunk": data[i:i + chunk_size]})
144
- await asyncio.sleep(0.1)
145
  await websocket.send_text("FinishedThisAttachment")
146
 
147
 
148
- # async def extract_text_from_pdf(pdf_data):
149
- # with io.BytesIO(base64.b64decode(pdf_data)) as pdf_file:
150
- # pdf_reader = PyPDF2.PdfFileReader(pdf_file)
151
- # text = ""
152
- # for page_num in range(pdf_reader.numPages):
153
- # page = pdf_reader.getPage(page_num)
154
- # text += page.extract_text()
155
- # return text
156
 
157
- # async def extract_text_from_docx(docx_data):
158
- # doc = Document(io.BytesIO(base64.b64decode(docx_data)))
159
- # text = ""
160
- # for para in doc.paragraphs:
161
- # text += para.text + "\n"
162
- # return text
163
 
164
- # async def extract_text_from_attachment(filename, data):
165
- # if filename.endswith('.pdf'):
166
- # return await extract_text_from_pdf(data)
167
- # elif filename.endswith('.docx'):
168
- # return await extract_text_from_docx(data)
169
- # else:
170
- # # Add handling for other document types if needed
171
- # return "Unsupported document type"
172
 
173
 
174
 
175
  @app.websocket("/ws")
176
  async def test_websocket(websocket: WebSocket):
177
- #This code is basically the authorization code and this authorization code helps us to get the access token with the required scopes that we have set .
178
- #We require the gmail.readonly scopes that requires verification of our application and all.
179
- # raw_body = await request.body()
180
- # return {"data":Yo Yo"}
181
- # data = await request.json()
182
- # code = data.get("access_token")
183
  await websocket.accept()
184
- # await print(sucess)
185
- # await print("Hi hi working")
186
  logger.info("Hi hi succefull in connecting !!")
187
- # await websocket.send_json({"message":"Yes Websockets successfull"})
188
- # await print("working after line 163")
189
- logger.info("Now receiving json!!")
190
 
191
  data = await websocket.receive_text()
192
  logger.info("Received JSON data: %s", data)
193
- # print(data)
194
- # code = data.get("message")
195
- # print(code)
196
- # code = raw_body.decode()
197
- # sent=event_generator(data)
198
- for i in range(1, 11):
199
- logging.info(f"printing value {i}")
200
 
201
  def get_messages(code:str):
202
- print()
203
- # print(code)
204
  logging.info("entered into the get_messages")
205
  access_token = code
206
  print("printing access_token")
@@ -242,12 +201,7 @@ async def test_websocket(websocket: WebSocket):
242
  if thread_id not in unique_thread_ids:
243
  unique_thread_ids.add(thread_id)
244
  filtered_data_list.append(entry)
245
-
246
-
247
- print(messages)
248
 
249
- print(filtered_data_list)
250
- logging.info(f"{filtered_data_list}")
251
 
252
  return filtered_data_list
253
 
@@ -255,14 +209,9 @@ async def test_websocket(websocket: WebSocket):
255
 
256
  logging.info("entered into the event_generator")
257
 
258
- print(code)
259
  access_token = code
260
  messages=get_messages(access_token)
261
  print(len(messages))
262
- # await websocket.send_json({"message 1":0})
263
- # time.sleep(1)
264
- # await websocket.send_text("message 2")
265
- # time.sleep(1)
266
 
267
  attachments = []
268
  prev_data=""
@@ -299,36 +248,15 @@ async def test_websocket(websocket: WebSocket):
299
  attachment_data = attachment_response.json()
300
  data = attachment_data.get("data",{})
301
  filename = part.get("filename", "untitled.txt")
302
-
303
- # print("Print the data json response for that gmail")
304
- print(filename)
305
-
306
-
307
- # print(attachment_data)
308
- # json_str = json.dumps(attachment_data, indent=2)
309
-
310
- # with subprocess.Popen(["less"], stdin=subprocess.PIPE) as less_process:
311
- # less_process.communicate(input=json_str.encode("utf-8"))
312
  if data:
313
  data_new[filename]=str(data[:10])
314
  attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
315
- # await websocket.send_json({"filename": filename})
316
- # await websocket.send_bytes(attachment_content)
317
- # extracted_text = await extract_text_from_attachment(filename, data)
318
  await send_chunked_data(websocket, filename, data)
319
 
320
  attachment_no+=1
321
- # time.sleep(2)
322
- # await websocket.send_json({filename:data})
323
-
324
- # yield f"data: {str(data_new)}\n\n"
325
-
326
- # data_new={}
327
-
328
-
329
-
330
-
331
-
332
 
333
  await websocket.send_text("CompletedFetchingMessages")
334
 
 
27
  GOOGLE_CLIENT_ID = "485753721652-5uta3e18va2g6cnkldib2d68q39t4vod.apps.googleusercontent.com"
28
  GOOGLE_CLIENT_SECRET = "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"
29
  GOOGLE_REDIRECT_URI = "https://omkar008-receipt-radar-test.hf.space/auth/google"
 
30
  # Configure the logger
31
  logging.basicConfig(level=logging.DEBUG)
32
  logger = logging.getLogger(__name__)
 
53
  print(code)
54
  if not code:
55
  raise HTTPException(status_code=400, detail="Authorization code not provided")
 
 
 
 
 
 
 
 
 
 
 
 
56
  access_token_new = code
57
 
58
  user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token_new}"})
 
110
  attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
111
  attachment_no+=1
112
 
 
 
 
 
 
 
 
 
 
 
 
113
  return {"attachment_count":attachment_no,"attachment_content":data_new}
114
 
115
 
 
117
  chunk_size = 1024 # Set an appropriate chunk size
118
  for i in range(0, len(data), chunk_size):
119
  await websocket.send_json({"filename": filename, "data_chunk": data[i:i + chunk_size]})
120
+ await asyncio.sleep(0.4)
121
  await websocket.send_text("FinishedThisAttachment")
122
 
123
 
124
+ async def extract_text_from_pdf(pdf_data):
125
+ with io.BytesIO(base64.b64decode(pdf_data)) as pdf_file:
126
+ pdf_reader = PyPDF2.PdfFileReader(pdf_file)
127
+ text = ""
128
+ for page_num in range(pdf_reader.numPages):
129
+ page = pdf_reader.getPage(page_num)
130
+ text += page.extract_text()
131
+ return text
132
 
133
+ async def extract_text_from_docx(docx_data):
134
+ doc = Document(io.BytesIO(base64.b64decode(docx_data)))
135
+ text = ""
136
+ for para in doc.paragraphs:
137
+ text += para.text + "\n"
138
+ return text
139
 
140
+ async def extract_text_from_attachment(filename, data):
141
+ if filename.endswith('.pdf'):
142
+ return await extract_text_from_pdf(data)
143
+ elif filename.endswith('.docx'):
144
+ return await extract_text_from_docx(data)
145
+ else:
146
+ # Add handling for other document types if needed
147
+ return "Unsupported document type"
148
 
149
 
150
 
151
  @app.websocket("/ws")
152
  async def test_websocket(websocket: WebSocket):
 
 
 
 
 
 
153
  await websocket.accept()
 
 
154
  logger.info("Hi hi succefull in connecting !!")
 
 
 
155
 
156
  data = await websocket.receive_text()
157
  logger.info("Received JSON data: %s", data)
158
+
159
+
 
 
 
 
 
160
 
161
  def get_messages(code:str):
162
+
 
163
  logging.info("entered into the get_messages")
164
  access_token = code
165
  print("printing access_token")
 
201
  if thread_id not in unique_thread_ids:
202
  unique_thread_ids.add(thread_id)
203
  filtered_data_list.append(entry)
 
 
 
204
 
 
 
205
 
206
  return filtered_data_list
207
 
 
209
 
210
  logging.info("entered into the event_generator")
211
 
 
212
  access_token = code
213
  messages=get_messages(access_token)
214
  print(len(messages))
 
 
 
 
215
 
216
  attachments = []
217
  prev_data=""
 
248
  attachment_data = attachment_response.json()
249
  data = attachment_data.get("data",{})
250
  filename = part.get("filename", "untitled.txt")
 
 
 
 
 
 
 
 
 
 
251
  if data:
252
  data_new[filename]=str(data[:10])
253
  attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
254
+ logging.info(filename)
255
+ extracted_text = await extract_text_from_attachment(filename, data
256
+ logging.info(extracted_text)
257
  await send_chunked_data(websocket, filename, data)
258
 
259
  attachment_no+=1
 
 
 
 
 
 
 
 
 
 
 
260
 
261
  await websocket.send_text("CompletedFetchingMessages")
262