Omkar008 commited on
Commit
2283c5d
1 Parent(s): 13c5500

Update test.py

Browse files
Files changed (1) hide show
  1. test.py +29 -2
test.py CHANGED
@@ -11,6 +11,7 @@ from docx import Document
11
  from PIL import Image
12
  import pytesseract
13
  import io
 
14
 
15
  app = FastAPI()
16
  oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
@@ -142,9 +143,22 @@ async def test_google(code:str):
142
  print(body_data)
143
  body = base64.urlsafe_b64decode(body_data)
144
  print("Subject:", subject)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- print("Body:", body)
147
-
148
  # Check for parts in the message payload
149
  if "payload" in message_data and "parts" in message_data["payload"]:
150
  for part in message_data["payload"]["parts"]:
@@ -165,7 +179,20 @@ async def test_google(code:str):
165
 
166
  return {"attachment_count":attachment_no,"attachment_content":data_new}
167
 
 
 
 
168
 
 
 
 
 
 
 
 
 
 
 
169
 
170
  @app.post("/auth/google")
171
  async def auth_google(request: Request):
 
11
  from PIL import Image
12
  import pytesseract
13
  import io
14
+ from bs4 import BeautifulSoup
15
 
16
  app = FastAPI()
17
  oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
 
143
  print(body_data)
144
  body = base64.urlsafe_b64decode(body_data)
145
  print("Subject:", subject)
146
+ if body:
147
+ text,links=extract_text_and_links(body)
148
+ if text:
149
+ print("Printing extracted Text: ")
150
+ print(text)
151
+ else:
152
+ print("No text found or there was some error parsing.")
153
+
154
+ if links:
155
+ print("\nLinks:")
156
+ for link_text, link_url in links:
157
+ print(f"{link_text}: {link_url}")
158
+ else:
159
+ print("No links found or there was some error in parsing or maybe don't use for loop.")
160
+
161
 
 
 
162
  # Check for parts in the message payload
163
  if "payload" in message_data and "parts" in message_data["payload"]:
164
  for part in message_data["payload"]["parts"]:
 
179
 
180
  return {"attachment_count":attachment_no,"attachment_content":data_new}
181
 
182
+
183
+ async def extract_text_and_links(html_content):
184
+ soup = BeautifulSoup(html_content, 'html.parser')
185
 
186
+ # Extract text
187
+ text = soup.get_text()
188
+
189
+ # Extract links
190
+ links = []
191
+ for link in soup.find_all('a', href=True):
192
+ links.append((link.text, link['href']))
193
+
194
+ return text, links
195
+
196
 
197
  @app.post("/auth/google")
198
  async def auth_google(request: Request):