import gradio as gr import requests from bs4 import BeautifulSoup from huggingface_hub import HfApi,create_repo,upload_file,get_full_repo_name import re import os token_self=os.environ.get("HF_TOKEN") default_repo_name="collection" ''' from selenium import webdriver from bs4 import BeautifulSoup url = 'https://www.wikipedia.org/' driver = webdriver.Chrome() driver.get(url) html_page = driver.page_source soup = BeautifulSoup(html_page, 'html.parser') title = soup.title.string print(title) driver.quit()''' def search_fn(query,count): if count>40: count = 40 page = requests.get(f"https://www.google.com/search?q={query}&num={count}") soup = BeautifulSoup(page.content) #links = soup.findAll("a") links = soup.findAll("a") file = open("myfile.txt", "w") for link in soup.find_all("a",href=re.compile("(?<=/url\?q=)(htt.*://.*)")): out = (re.split(":(?=http)",link["href"].replace("/url?q=","").split("&sa",1)[0])) out = out[0] rr=requests.get(f"{out}") x_opt = (dict(rr.headers).get("x-frame-options")) if x_opt == None: frame_l=f'
' file.writelines(frame_l) else: pass #print(file1.read()) print (out) print(dict(rr.headers).get("x-frame-options")) file.close() with open("myfile.txt", "r") as file1: html_out = file1.read() out = format_t(html_out) return out def details_fn(query): link_list=[] page = requests.get(f"{query}") #links = soup.findAll("a") soup = BeautifulSoup(page.content, 'html.parser') try: title = soup.title.string except Exception as e: title = query try: description = soup.find('meta', attrs={'name':'description'}) description = description['content'] except Exception as e: description = title out = f"""