import time from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') from abc import ABC,abstractmethod import pandas as pd from steps.utils import get_priority_link class BOT(ABC): ''' Abstract method for selenium bot ''' @abstractmethod def target_html() -> str: pass class CompetitorBot(BOT): ''' Get Competitor info for doing the steps in order: 1> google search company_name competitors 2> select most appropriate search result 3> Go to website, scrape ''' def target_html(self,company_name: str) -> str: browser = webdriver.Chrome(options=options) #Searching on FireFox browser.get('http://www.google.com') search = browser.find_element("name", "q") search.send_keys(company_name + " craft.co competitors") search.send_keys(Keys.RETURN) # hit return after you enter search text time.sleep(5) # Getting Search results search_results = browser.find_elements(By.XPATH,"//a") links = [result.get_attribute("href") for result in search_results] #Getting Priority Link website,idx = get_priority_link(links) print(website,idx) print(links[idx]) #go to the website search_results[idx].click() time.sleep(10) html = browser.page_source browser.quit() return website,html