Spaces:
Running
Running
Update test.py
Browse files
test.py
CHANGED
@@ -27,7 +27,6 @@ oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
|
27 |
GOOGLE_CLIENT_ID = "485753721652-5uta3e18va2g6cnkldib2d68q39t4vod.apps.googleusercontent.com"
|
28 |
GOOGLE_CLIENT_SECRET = "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"
|
29 |
GOOGLE_REDIRECT_URI = "https://omkar008-receipt-radar-test.hf.space/auth/google"
|
30 |
-
GOOGLE_REDIRECT_URI_hr = "https://receiptradar-0bb387d81174.herokuapp.com/auth/google"
|
31 |
# Configure the logger
|
32 |
logging.basicConfig(level=logging.DEBUG)
|
33 |
logger = logging.getLogger(__name__)
|
@@ -54,18 +53,6 @@ async def auth_google(request: Request):
|
|
54 |
print(code)
|
55 |
if not code:
|
56 |
raise HTTPException(status_code=400, detail="Authorization code not provided")
|
57 |
-
|
58 |
-
# token_url = "https://accounts.google.com/o/oauth2/token"
|
59 |
-
# print(code)
|
60 |
-
# data = {
|
61 |
-
# "code": code,
|
62 |
-
# "client_id": GOOGLE_CLIENT_ID,
|
63 |
-
# "client_secret": GOOGLE_CLIENT_SECRET,
|
64 |
-
# "redirect_uri": GOOGLE_REDIRECT_URI,
|
65 |
-
# "grant_type": "authorization_code",
|
66 |
-
# }
|
67 |
-
# response = requests.post(token_url, data=data)
|
68 |
-
# access_token = response.json().get("access_token")
|
69 |
access_token_new = code
|
70 |
|
71 |
user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token_new}"})
|
@@ -123,17 +110,6 @@ async def auth_google(request: Request):
|
|
123 |
attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
|
124 |
attachment_no+=1
|
125 |
|
126 |
-
# if data:
|
127 |
-
# # Decode base64-encoded attachment data
|
128 |
-
# attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
|
129 |
-
|
130 |
-
# # Save the attachment to a file
|
131 |
-
# save_path = f"/Users/omkarmalpure/Documents/Gmail_API/attachments/{filename}"
|
132 |
-
# with open(save_path, "wb") as file:
|
133 |
-
# file.write(attachment_content)
|
134 |
-
|
135 |
-
# attachments.append(save_path)
|
136 |
-
print(data_new)
|
137 |
return {"attachment_count":attachment_no,"attachment_content":data_new}
|
138 |
|
139 |
|
@@ -141,66 +117,49 @@ async def send_chunked_data(websocket: WebSocket, filename: str, data: str):
|
|
141 |
chunk_size = 1024 # Set an appropriate chunk size
|
142 |
for i in range(0, len(data), chunk_size):
|
143 |
await websocket.send_json({"filename": filename, "data_chunk": data[i:i + chunk_size]})
|
144 |
-
await asyncio.sleep(0.
|
145 |
await websocket.send_text("FinishedThisAttachment")
|
146 |
|
147 |
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
#
|
171 |
-
|
172 |
|
173 |
|
174 |
|
175 |
@app.websocket("/ws")
|
176 |
async def test_websocket(websocket: WebSocket):
|
177 |
-
#This code is basically the authorization code and this authorization code helps us to get the access token with the required scopes that we have set .
|
178 |
-
#We require the gmail.readonly scopes that requires verification of our application and all.
|
179 |
-
# raw_body = await request.body()
|
180 |
-
# return {"data":Yo Yo"}
|
181 |
-
# data = await request.json()
|
182 |
-
# code = data.get("access_token")
|
183 |
await websocket.accept()
|
184 |
-
# await print(sucess)
|
185 |
-
# await print("Hi hi working")
|
186 |
logger.info("Hi hi succefull in connecting !!")
|
187 |
-
# await websocket.send_json({"message":"Yes Websockets successfull"})
|
188 |
-
# await print("working after line 163")
|
189 |
-
logger.info("Now receiving json!!")
|
190 |
|
191 |
data = await websocket.receive_text()
|
192 |
logger.info("Received JSON data: %s", data)
|
193 |
-
|
194 |
-
|
195 |
-
# print(code)
|
196 |
-
# code = raw_body.decode()
|
197 |
-
# sent=event_generator(data)
|
198 |
-
for i in range(1, 11):
|
199 |
-
logging.info(f"printing value {i}")
|
200 |
|
201 |
def get_messages(code:str):
|
202 |
-
|
203 |
-
# print(code)
|
204 |
logging.info("entered into the get_messages")
|
205 |
access_token = code
|
206 |
print("printing access_token")
|
@@ -242,12 +201,7 @@ async def test_websocket(websocket: WebSocket):
|
|
242 |
if thread_id not in unique_thread_ids:
|
243 |
unique_thread_ids.add(thread_id)
|
244 |
filtered_data_list.append(entry)
|
245 |
-
|
246 |
-
|
247 |
-
print(messages)
|
248 |
|
249 |
-
print(filtered_data_list)
|
250 |
-
logging.info(f"{filtered_data_list}")
|
251 |
|
252 |
return filtered_data_list
|
253 |
|
@@ -255,14 +209,9 @@ async def test_websocket(websocket: WebSocket):
|
|
255 |
|
256 |
logging.info("entered into the event_generator")
|
257 |
|
258 |
-
print(code)
|
259 |
access_token = code
|
260 |
messages=get_messages(access_token)
|
261 |
print(len(messages))
|
262 |
-
# await websocket.send_json({"message 1":0})
|
263 |
-
# time.sleep(1)
|
264 |
-
# await websocket.send_text("message 2")
|
265 |
-
# time.sleep(1)
|
266 |
|
267 |
attachments = []
|
268 |
prev_data=""
|
@@ -299,36 +248,15 @@ async def test_websocket(websocket: WebSocket):
|
|
299 |
attachment_data = attachment_response.json()
|
300 |
data = attachment_data.get("data",{})
|
301 |
filename = part.get("filename", "untitled.txt")
|
302 |
-
|
303 |
-
# print("Print the data json response for that gmail")
|
304 |
-
print(filename)
|
305 |
-
|
306 |
-
|
307 |
-
# print(attachment_data)
|
308 |
-
# json_str = json.dumps(attachment_data, indent=2)
|
309 |
-
|
310 |
-
# with subprocess.Popen(["less"], stdin=subprocess.PIPE) as less_process:
|
311 |
-
# less_process.communicate(input=json_str.encode("utf-8"))
|
312 |
if data:
|
313 |
data_new[filename]=str(data[:10])
|
314 |
attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
await send_chunked_data(websocket, filename, data)
|
319 |
|
320 |
attachment_no+=1
|
321 |
-
# time.sleep(2)
|
322 |
-
# await websocket.send_json({filename:data})
|
323 |
-
|
324 |
-
# yield f"data: {str(data_new)}\n\n"
|
325 |
-
|
326 |
-
# data_new={}
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
|
333 |
await websocket.send_text("CompletedFetchingMessages")
|
334 |
|
|
|
27 |
GOOGLE_CLIENT_ID = "485753721652-5uta3e18va2g6cnkldib2d68q39t4vod.apps.googleusercontent.com"
|
28 |
GOOGLE_CLIENT_SECRET = "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"
|
29 |
GOOGLE_REDIRECT_URI = "https://omkar008-receipt-radar-test.hf.space/auth/google"
|
|
|
30 |
# Configure the logger
|
31 |
logging.basicConfig(level=logging.DEBUG)
|
32 |
logger = logging.getLogger(__name__)
|
|
|
53 |
print(code)
|
54 |
if not code:
|
55 |
raise HTTPException(status_code=400, detail="Authorization code not provided")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
access_token_new = code
|
57 |
|
58 |
user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token_new}"})
|
|
|
110 |
attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
|
111 |
attachment_no+=1
|
112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
return {"attachment_count":attachment_no,"attachment_content":data_new}
|
114 |
|
115 |
|
|
|
117 |
chunk_size = 1024 # Set an appropriate chunk size
|
118 |
for i in range(0, len(data), chunk_size):
|
119 |
await websocket.send_json({"filename": filename, "data_chunk": data[i:i + chunk_size]})
|
120 |
+
await asyncio.sleep(0.4)
|
121 |
await websocket.send_text("FinishedThisAttachment")
|
122 |
|
123 |
|
124 |
+
async def extract_text_from_pdf(pdf_data):
|
125 |
+
with io.BytesIO(base64.b64decode(pdf_data)) as pdf_file:
|
126 |
+
pdf_reader = PyPDF2.PdfFileReader(pdf_file)
|
127 |
+
text = ""
|
128 |
+
for page_num in range(pdf_reader.numPages):
|
129 |
+
page = pdf_reader.getPage(page_num)
|
130 |
+
text += page.extract_text()
|
131 |
+
return text
|
132 |
|
133 |
+
async def extract_text_from_docx(docx_data):
|
134 |
+
doc = Document(io.BytesIO(base64.b64decode(docx_data)))
|
135 |
+
text = ""
|
136 |
+
for para in doc.paragraphs:
|
137 |
+
text += para.text + "\n"
|
138 |
+
return text
|
139 |
|
140 |
+
async def extract_text_from_attachment(filename, data):
|
141 |
+
if filename.endswith('.pdf'):
|
142 |
+
return await extract_text_from_pdf(data)
|
143 |
+
elif filename.endswith('.docx'):
|
144 |
+
return await extract_text_from_docx(data)
|
145 |
+
else:
|
146 |
+
# Add handling for other document types if needed
|
147 |
+
return "Unsupported document type"
|
148 |
|
149 |
|
150 |
|
151 |
@app.websocket("/ws")
|
152 |
async def test_websocket(websocket: WebSocket):
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
await websocket.accept()
|
|
|
|
|
154 |
logger.info("Hi hi succefull in connecting !!")
|
|
|
|
|
|
|
155 |
|
156 |
data = await websocket.receive_text()
|
157 |
logger.info("Received JSON data: %s", data)
|
158 |
+
|
159 |
+
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
def get_messages(code:str):
|
162 |
+
|
|
|
163 |
logging.info("entered into the get_messages")
|
164 |
access_token = code
|
165 |
print("printing access_token")
|
|
|
201 |
if thread_id not in unique_thread_ids:
|
202 |
unique_thread_ids.add(thread_id)
|
203 |
filtered_data_list.append(entry)
|
|
|
|
|
|
|
204 |
|
|
|
|
|
205 |
|
206 |
return filtered_data_list
|
207 |
|
|
|
209 |
|
210 |
logging.info("entered into the event_generator")
|
211 |
|
|
|
212 |
access_token = code
|
213 |
messages=get_messages(access_token)
|
214 |
print(len(messages))
|
|
|
|
|
|
|
|
|
215 |
|
216 |
attachments = []
|
217 |
prev_data=""
|
|
|
248 |
attachment_data = attachment_response.json()
|
249 |
data = attachment_data.get("data",{})
|
250 |
filename = part.get("filename", "untitled.txt")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
if data:
|
252 |
data_new[filename]=str(data[:10])
|
253 |
attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
|
254 |
+
logging.info(filename)
|
255 |
+
extracted_text = await extract_text_from_attachment(filename, data
|
256 |
+
logging.info(extracted_text)
|
257 |
await send_chunked_data(websocket, filename, data)
|
258 |
|
259 |
attachment_no+=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
|
261 |
await websocket.send_text("CompletedFetchingMessages")
|
262 |
|