from fastapi import FastAPI, Depends , Request,WebSocket from fastapi.security import OAuth2PasswordBearer import requests from jose import jwt import webbrowser import base64 import logging import time import asyncio app = FastAPI() oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") # Replace these with your own values from the Google Developer Console # GOOGLE_CLIENT_ID = "" # GOOGLE_CLIENT_SECRET = "" # GOOGLE_REDIRECT_URI = "" GOOGLE_CLIENT_ID = "485753721652-5uta3e18va2g6cnkldib2d68q39t4vod.apps.googleusercontent.com" GOOGLE_CLIENT_SECRET = "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" GOOGLE_REDIRECT_URI = "https://omkar008-receipt-radar-test.hf.space/auth/google" GOOGLE_REDIRECT_URI_hr = "https://receiptradar-0bb387d81174.herokuapp.com/auth/google" # Configure the logger logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger(__name__) @app.get("/") async def login_google(): # oauth_url = f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI}&scope=openid%20profile%20email&access_type=offline" #Below is the URL to prompt the user to login to his specified gmail account and also give a readonly access to his gmail oauth_url = f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI}&scope=openid%20profile%20email%20https://www.googleapis.com/auth/gmail.readonly&access_type=offline" oauth_url_hr = f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI_hr}&scope=openid%20profile%20email%20https://www.googleapis.com/auth/gmail.readonly&access_type=offline&state=receipts" webbrowser.open(oauth_url) return { "url_hr": oauth_url_hr } @app.post("/auth/google") async def auth_google(request: Request): data = await request.json() code = data.get("access_token") print("Printing the access token") print(code) if not code: raise HTTPException(status_code=400, detail="Authorization code not provided") # token_url = "https://accounts.google.com/o/oauth2/token" # print(code) # data = { # "code": code, # "client_id": GOOGLE_CLIENT_ID, # "client_secret": GOOGLE_CLIENT_SECRET, # "redirect_uri": GOOGLE_REDIRECT_URI, # "grant_type": "authorization_code", # } # response = requests.post(token_url, data=data) # access_token = response.json().get("access_token") access_token_new = code user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token_new}"}) page_token = None messages = [] jobs_query = "subject:receipt OR subject:receipts OR subject:Invoice OR subject:invoice has:attachment " while True: # Construct Gmail API request with pageToken gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={jobs_query}" if page_token: gmail_url += f"&pageToken={page_token}" gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token_new}"}) gmail_data = gmail_response.json() # Check if there are messages in the response if "messages" in gmail_data: messages.extend(gmail_data["messages"]) # Check if there are more pages if "nextPageToken" in gmail_data: page_token = gmail_data["nextPageToken"] else: break # No more pages, exit the loop attachments = [] attachment_no = 0 data_new = {} for i,message in enumerate(messages) : # print(i) # print(message) if message: message_id = message.get("id") print(message_id) if message_id: message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}" message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token_new}"}) message_data = message_response.json() # Check for parts in the message payload if "payload" in message_data and "parts" in message_data["payload"]: for part in message_data["payload"]["parts"]: if "body" in part and "attachmentId" in part["body"]: attachment_id = part["body"]["attachmentId"] attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}" attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token_new}"}) attachment_data = attachment_response.json() data = attachment_data.get("data") filename = part.get("filename", "untitled.txt") if data: data_new[filename]=data attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8")) attachment_no+=1 # if data: # # Decode base64-encoded attachment data # attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8")) # # Save the attachment to a file # save_path = f"/Users/omkarmalpure/Documents/Gmail_API/attachments/{filename}" # with open(save_path, "wb") as file: # file.write(attachment_content) # attachments.append(save_path) return {"attachment_count":attachment_no,"attachment_content":data_new} async def send_chunked_data(websocket: WebSocket, filename: str, data: str): chunk_size = 1024 # Set an appropriate chunk size for i in range(0, len(data), chunk_size): await websocket.send_json({"filename": filename, "data_chunk": data[i:i + chunk_size]}) await asyncio.sleep(0.1) await websocket.send_text("FinishedThisAttachment") # async def extract_text_from_pdf(pdf_data): # with io.BytesIO(base64.b64decode(pdf_data)) as pdf_file: # pdf_reader = PyPDF2.PdfFileReader(pdf_file) # text = "" # for page_num in range(pdf_reader.numPages): # page = pdf_reader.getPage(page_num) # text += page.extract_text() # return text # async def extract_text_from_docx(docx_data): # doc = Document(io.BytesIO(base64.b64decode(docx_data))) # text = "" # for para in doc.paragraphs: # text += para.text + "\n" # return text # async def extract_text_from_attachment(filename, data): # if filename.endswith('.pdf'): # return await extract_text_from_pdf(data) # elif filename.endswith('.docx'): # return await extract_text_from_docx(data) # else: # # Add handling for other document types if needed # return "Unsupported document type" @app.websocket("/ws") async def test_websocket(websocket: WebSocket): #This code is basically the authorization code and this authorization code helps us to get the access token with the required scopes that we have set . #We require the gmail.readonly scopes that requires verification of our application and all. # raw_body = await request.body() # return {"data":Yo Yo"} # data = await request.json() # code = data.get("access_token") await websocket.accept() # await print(sucess) # await print("Hi hi working") logger.info("Hi hi succefull in connecting !!") # await websocket.send_json({"message":"Yes Websockets successfull"}) # await print("working after line 163") logger.info("Now receiving json!!") data = await websocket.receive_text() logger.info("Received JSON data: %s", data) # print(data) # code = data.get("message") # print(code) # code = raw_body.decode() # sent=event_generator(data) for i in range(1, 11): logging.info(f"printing value {i}") def get_messages(code:str): print() # print(code) logging.info("entered into the get_messages") access_token = code print("printing access_token") print(access_token) page_token = None messages = [] jobs_query = "subject:receipt OR subject:receipts has:attachment" while True: # Construct Gmail API request with pageToken print("into the gmail") gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={jobs_query}" if page_token: gmail_url += f"&pageToken={page_token}" gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"}) logging.info(f"{gmail_response}") print(gmail_response) gmail_data = gmail_response.json() # Check if there are messages in the response if "messages" in gmail_data: messages.extend(gmail_data["messages"]) # Check if there are more pages if "nextPageToken" in gmail_data: page_token = gmail_data["nextPageToken"] else: break # No more pages, exit the loop print("returning the messages") unique_thread_ids = set() filtered_data_list = [] for entry in messages: thread_id = entry['threadId'] if thread_id not in unique_thread_ids: unique_thread_ids.add(thread_id) filtered_data_list.append(entry) print(messages) print(filtered_data_list) logging.info(f"{filtered_data_list}") return filtered_data_list async def event_generator(code:str): logging.info("entered into the event_generator") print(code) access_token = code messages=get_messages(access_token) print(len(messages)) # await websocket.send_json({"message 1":0}) # time.sleep(1) # await websocket.send_text("message 2") # time.sleep(1) attachments = [] prev_data="" data_new={} attachment_no=0 batch_size = 5 prev_filename = None for i,message in enumerate(messages) : print(i) logging.info(f"{i}") logging.info(f"{message}") print(message) if message: message_id = message.get("id") thread_id = message.get("threadId") print(message_id) if message_id: message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}" message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token}"}) message_data = message_response.json() # Check for parts in the message payload if "payload" in message_data and "parts" in message_data["payload"]: for part in message_data["payload"]["parts"]: if "body" in part and "attachmentId" in part["body"]: attachment_id = part["body"]["attachmentId"] print(attachment_id) attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}" attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token}"}) attachment_data = attachment_response.json() data = attachment_data.get("data",{}) filename = part.get("filename", "untitled.txt") # print("Print the data json response for that gmail") print(filename) # print(attachment_data) # json_str = json.dumps(attachment_data, indent=2) # with subprocess.Popen(["less"], stdin=subprocess.PIPE) as less_process: # less_process.communicate(input=json_str.encode("utf-8")) if data: data_new[filename]=str(data[:10]) attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8")) # await websocket.send_json({"filename": filename}) # await websocket.send_bytes(attachment_content) # extracted_text = await extract_text_from_attachment(filename, data) await send_chunked_data(websocket, filename, data) attachment_no+=1 # time.sleep(2) # await websocket.send_json({filename:data}) # yield f"data: {str(data_new)}\n\n" # data_new={} await websocket.send_text("CompletedFetchingMessages") await event_generator(data) logging.info("Closing connection") await websocket.close() @app.get("/token") async def get_token(token: str = Depends(oauth2_scheme)): return jwt.decode(token, GOOGLE_CLIENT_SECRET, algorithms=["HS256"])