Spaces:

Omkar008
/

receipt_radar_test

Running

App Files Files Community

Omkar008 commited on Jan 29

Commit

2283c5d

•

1 Parent(s): 13c5500

Update test.py

Browse files

Files changed (1) hide show

test.py +29 -2

test.py CHANGED Viewed

@@ -11,6 +11,7 @@ from docx import Document
 from PIL import Image
 import pytesseract
 import io
 app = FastAPI()
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
@@ -142,9 +143,22 @@ async def test_google(code:str):
                                 print(body_data)
                                 body = base64.urlsafe_b64decode(body_data)
                 print("Subject:", subject)
-                print("Body:", body)
                 # Check for parts in the message payload
                 if "payload" in message_data and "parts" in message_data["payload"]:
                     for part in message_data["payload"]["parts"]:
@@ -165,7 +179,20 @@ async def test_google(code:str):
     return {"attachment_count":attachment_no,"attachment_content":data_new}
 @app.post("/auth/google")
 async def auth_google(request: Request):

 from PIL import Image
 import pytesseract
 import io
+from bs4 import BeautifulSoup
 app = FastAPI()
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
                                 print(body_data)
                                 body = base64.urlsafe_b64decode(body_data)
                 print("Subject:", subject)
+                if body:
+                    text,links=extract_text_and_links(body)
+                    if text:
+                        print("Printing extracted Text: ")
+                        print(text)
+                    else:
+                        print("No text found or there was some error parsing.")
+                if links:
+                    print("\nLinks:")
+                    for link_text, link_url in links:
+                        print(f"{link_text}: {link_url}")
+                else:
+                    print("No links found or there was some error in parsing or maybe don't use for loop.")
                 # Check for parts in the message payload
                 if "payload" in message_data and "parts" in message_data["payload"]:
                     for part in message_data["payload"]["parts"]:
     return {"attachment_count":attachment_no,"attachment_content":data_new}
+async def extract_text_and_links(html_content):
+    soup = BeautifulSoup(html_content, 'html.parser')
+    # Extract text
+    text = soup.get_text()
+    # Extract links
+    links = []
+    for link in soup.find_all('a', href=True):
+        links.append((link.text, link['href']))
+    return text, links
 @app.post("/auth/google")
 async def auth_google(request: Request):