Omkar008's picture
Update test.py
52b5672 verified
raw
history blame
14 kB
from fastapi import FastAPI, Depends , Request,WebSocket
from fastapi.security import OAuth2PasswordBearer
import requests
from jose import jwt
import webbrowser
import base64
import logging
import time
import asyncio
app = FastAPI()
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
# Replace these with your own values from the Google Developer Console
# GOOGLE_CLIENT_ID = ""
# GOOGLE_CLIENT_SECRET = ""
# GOOGLE_REDIRECT_URI = ""
GOOGLE_CLIENT_ID = "485753721652-5uta3e18va2g6cnkldib2d68q39t4vod.apps.googleusercontent.com"
GOOGLE_CLIENT_SECRET = "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"
GOOGLE_REDIRECT_URI = "https://omkar008-receipt-radar-test.hf.space/auth/google"
GOOGLE_REDIRECT_URI_hr = "https://receiptradar-0bb387d81174.herokuapp.com/auth/google"
# Configure the logger
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
@app.get("/")
async def login_google():
# oauth_url = f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI}&scope=openid%20profile%20email&access_type=offline"
#Below is the URL to prompt the user to login to his specified gmail account and also give a readonly access to his gmail
oauth_url = f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI}&scope=openid%20profile%20email%20https://www.googleapis.com/auth/gmail.readonly&access_type=offline"
oauth_url_hr = f"https://accounts.google.com/o/oauth2/auth?response_type=code&client_id={GOOGLE_CLIENT_ID}&redirect_uri={GOOGLE_REDIRECT_URI_hr}&scope=openid%20profile%20email%20https://www.googleapis.com/auth/gmail.readonly&access_type=offline&state=receipts"
webbrowser.open(oauth_url)
return {
"url_hr": oauth_url_hr
}
@app.post("/auth/google")
async def auth_google(request: Request):
data = await request.json()
code = data.get("access_token")
print("Printing the access token")
print(code)
if not code:
raise HTTPException(status_code=400, detail="Authorization code not provided")
# token_url = "https://accounts.google.com/o/oauth2/token"
# print(code)
# data = {
# "code": code,
# "client_id": GOOGLE_CLIENT_ID,
# "client_secret": GOOGLE_CLIENT_SECRET,
# "redirect_uri": GOOGLE_REDIRECT_URI,
# "grant_type": "authorization_code",
# }
# response = requests.post(token_url, data=data)
# access_token = response.json().get("access_token")
access_token_new = code
user_info = requests.get("https://www.googleapis.com/oauth2/v1/userinfo", headers={"Authorization": f"Bearer {access_token_new}"})
page_token = None
messages = []
jobs_query = "subject:receipt OR subject:receipts OR subject:Invoice OR subject:invoice has:attachment "
while True:
# Construct Gmail API request with pageToken
gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={jobs_query}"
if page_token:
gmail_url += f"&pageToken={page_token}"
gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token_new}"})
gmail_data = gmail_response.json()
# Check if there are messages in the response
if "messages" in gmail_data:
messages.extend(gmail_data["messages"])
# Check if there are more pages
if "nextPageToken" in gmail_data:
page_token = gmail_data["nextPageToken"]
else:
break # No more pages, exit the loop
attachments = []
attachment_no = 0
data_new = {}
for i,message in enumerate(messages) :
# print(i)
# print(message)
if message:
message_id = message.get("id")
print(message_id)
if message_id:
message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}"
message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token_new}"})
message_data = message_response.json()
# Check for parts in the message payload
if "payload" in message_data and "parts" in message_data["payload"]:
for part in message_data["payload"]["parts"]:
if "body" in part and "attachmentId" in part["body"]:
attachment_id = part["body"]["attachmentId"]
attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}"
attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token_new}"})
attachment_data = attachment_response.json()
data = attachment_data.get("data")
filename = part.get("filename", "untitled.txt")
if data:
data_new[filename]=data
attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
attachment_no+=1
# if data:
# # Decode base64-encoded attachment data
# attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
# # Save the attachment to a file
# save_path = f"/Users/omkarmalpure/Documents/Gmail_API/attachments/{filename}"
# with open(save_path, "wb") as file:
# file.write(attachment_content)
# attachments.append(save_path)
return {"attachment_count":attachment_no,"attachment_content":data_new}
async def send_chunked_data(websocket: WebSocket, filename: str, data: str):
chunk_size = 1024 # Set an appropriate chunk size
for i in range(0, len(data), chunk_size):
await websocket.send_json({"filename": filename, "data_chunk": data[i:i + chunk_size]})
await asyncio.sleep(0.1)
await websocket.send_text("FinishedThisAttachment")
# async def extract_text_from_pdf(pdf_data):
# with io.BytesIO(base64.b64decode(pdf_data)) as pdf_file:
# pdf_reader = PyPDF2.PdfFileReader(pdf_file)
# text = ""
# for page_num in range(pdf_reader.numPages):
# page = pdf_reader.getPage(page_num)
# text += page.extract_text()
# return text
# async def extract_text_from_docx(docx_data):
# doc = Document(io.BytesIO(base64.b64decode(docx_data)))
# text = ""
# for para in doc.paragraphs:
# text += para.text + "\n"
# return text
# async def extract_text_from_attachment(filename, data):
# if filename.endswith('.pdf'):
# return await extract_text_from_pdf(data)
# elif filename.endswith('.docx'):
# return await extract_text_from_docx(data)
# else:
# # Add handling for other document types if needed
# return "Unsupported document type"
@app.websocket("/ws")
async def test_websocket(websocket: WebSocket):
#This code is basically the authorization code and this authorization code helps us to get the access token with the required scopes that we have set .
#We require the gmail.readonly scopes that requires verification of our application and all.
# raw_body = await request.body()
# return {"data":Yo Yo"}
# data = await request.json()
# code = data.get("access_token")
await websocket.accept()
# await print(sucess)
# await print("Hi hi working")
logger.info("Hi hi succefull in connecting !!")
# await websocket.send_json({"message":"Yes Websockets successfull"})
# await print("working after line 163")
logger.info("Now receiving json!!")
data = await websocket.receive_text()
logger.info("Received JSON data: %s", data)
# print(data)
# code = data.get("message")
# print(code)
# code = raw_body.decode()
# sent=event_generator(data)
for i in range(1, 11):
logging.info(f"printing value {i}")
def get_messages(code:str):
print()
# print(code)
logging.info("entered into the get_messages")
access_token = code
print("printing access_token")
print(access_token)
page_token = None
messages = []
jobs_query = "subject:receipt OR subject:receipts has:attachment"
while True:
# Construct Gmail API request with pageToken
print("into the gmail")
gmail_url = f"https://www.googleapis.com/gmail/v1/users/me/messages?q={jobs_query}"
if page_token:
gmail_url += f"&pageToken={page_token}"
gmail_response = requests.get(gmail_url, headers={"Authorization": f"Bearer {access_token}"})
logging.info(f"{gmail_response}")
print(gmail_response)
gmail_data = gmail_response.json()
# Check if there are messages in the response
if "messages" in gmail_data:
messages.extend(gmail_data["messages"])
# Check if there are more pages
if "nextPageToken" in gmail_data:
page_token = gmail_data["nextPageToken"]
else:
break # No more pages, exit the loop
print("returning the messages")
unique_thread_ids = set()
filtered_data_list = []
for entry in messages:
thread_id = entry['threadId']
if thread_id not in unique_thread_ids:
unique_thread_ids.add(thread_id)
filtered_data_list.append(entry)
print(messages)
print(filtered_data_list)
logging.info(f"{filtered_data_list}")
return filtered_data_list
async def event_generator(code:str):
logging.info("entered into the event_generator")
print(code)
access_token = code
messages=get_messages(access_token)
print(len(messages))
# await websocket.send_json({"message 1":0})
# time.sleep(1)
# await websocket.send_text("message 2")
# time.sleep(1)
attachments = []
prev_data=""
data_new={}
attachment_no=0
batch_size = 5
prev_filename = None
for i,message in enumerate(messages) :
print(i)
logging.info(f"{i}")
logging.info(f"{message}")
print(message)
if message:
message_id = message.get("id")
thread_id = message.get("threadId")
print(message_id)
if message_id:
message_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}"
message_response = requests.get(message_url, headers={"Authorization": f"Bearer {access_token}"})
message_data = message_response.json()
# Check for parts in the message payload
if "payload" in message_data and "parts" in message_data["payload"]:
for part in message_data["payload"]["parts"]:
if "body" in part and "attachmentId" in part["body"]:
attachment_id = part["body"]["attachmentId"]
print(attachment_id)
attachment_url = f"https://www.googleapis.com/gmail/v1/users/me/messages/{message_id}/attachments/{attachment_id}"
attachment_response = requests.get(attachment_url, headers={"Authorization": f"Bearer {access_token}"})
attachment_data = attachment_response.json()
data = attachment_data.get("data",{})
filename = part.get("filename", "untitled.txt")
# print("Print the data json response for that gmail")
print(filename)
# print(attachment_data)
# json_str = json.dumps(attachment_data, indent=2)
# with subprocess.Popen(["less"], stdin=subprocess.PIPE) as less_process:
# less_process.communicate(input=json_str.encode("utf-8"))
if data:
data_new[filename]=str(data[:10])
attachment_content = base64.urlsafe_b64decode(data.encode("UTF-8"))
# await websocket.send_json({"filename": filename})
# await websocket.send_bytes(attachment_content)
# extracted_text = await extract_text_from_attachment(filename, data)
await send_chunked_data(websocket, filename, data)
attachment_no+=1
# time.sleep(2)
# await websocket.send_json({filename:data})
# yield f"data: {str(data_new)}\n\n"
# data_new={}
await websocket.send_text("CompletedFetchingMessages")
await event_generator(data)
logging.info("Closing connection")
await websocket.close()
@app.get("/token")
async def get_token(token: str = Depends(oauth2_scheme)):
return jwt.decode(token, GOOGLE_CLIENT_SECRET, algorithms=["HS256"])