Spaces:
Running
Running
Update test.py
Browse files
test.py
CHANGED
@@ -11,6 +11,7 @@ from docx import Document
|
|
11 |
from PIL import Image
|
12 |
import pytesseract
|
13 |
import io
|
|
|
14 |
|
15 |
app = FastAPI()
|
16 |
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
@@ -142,9 +143,22 @@ async def test_google(code:str):
|
|
142 |
print(body_data)
|
143 |
body = base64.urlsafe_b64decode(body_data)
|
144 |
print("Subject:", subject)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
145 |
|
146 |
-
print("Body:", body)
|
147 |
-
|
148 |
# Check for parts in the message payload
|
149 |
if "payload" in message_data and "parts" in message_data["payload"]:
|
150 |
for part in message_data["payload"]["parts"]:
|
@@ -165,7 +179,20 @@ async def test_google(code:str):
|
|
165 |
|
166 |
return {"attachment_count":attachment_no,"attachment_content":data_new}
|
167 |
|
|
|
|
|
|
|
168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
@app.post("/auth/google")
|
171 |
async def auth_google(request: Request):
|
|
|
11 |
from PIL import Image
|
12 |
import pytesseract
|
13 |
import io
|
14 |
+
from bs4 import BeautifulSoup
|
15 |
|
16 |
app = FastAPI()
|
17 |
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
|
|
|
143 |
print(body_data)
|
144 |
body = base64.urlsafe_b64decode(body_data)
|
145 |
print("Subject:", subject)
|
146 |
+
if body:
|
147 |
+
text,links=extract_text_and_links(body)
|
148 |
+
if text:
|
149 |
+
print("Printing extracted Text: ")
|
150 |
+
print(text)
|
151 |
+
else:
|
152 |
+
print("No text found or there was some error parsing.")
|
153 |
+
|
154 |
+
if links:
|
155 |
+
print("\nLinks:")
|
156 |
+
for link_text, link_url in links:
|
157 |
+
print(f"{link_text}: {link_url}")
|
158 |
+
else:
|
159 |
+
print("No links found or there was some error in parsing or maybe don't use for loop.")
|
160 |
+
|
161 |
|
|
|
|
|
162 |
# Check for parts in the message payload
|
163 |
if "payload" in message_data and "parts" in message_data["payload"]:
|
164 |
for part in message_data["payload"]["parts"]:
|
|
|
179 |
|
180 |
return {"attachment_count":attachment_no,"attachment_content":data_new}
|
181 |
|
182 |
+
|
183 |
+
async def extract_text_and_links(html_content):
|
184 |
+
soup = BeautifulSoup(html_content, 'html.parser')
|
185 |
|
186 |
+
# Extract text
|
187 |
+
text = soup.get_text()
|
188 |
+
|
189 |
+
# Extract links
|
190 |
+
links = []
|
191 |
+
for link in soup.find_all('a', href=True):
|
192 |
+
links.append((link.text, link['href']))
|
193 |
+
|
194 |
+
return text, links
|
195 |
+
|
196 |
|
197 |
@app.post("/auth/google")
|
198 |
async def auth_google(request: Request):
|