Spaces:
Runtime error
Runtime error
File size: 1,758 Bytes
08e59e6 bbb039b 431ef39 bbb039b 08e59e6 431ef39 08e59e6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
from abc import ABC,abstractmethod
import pandas as pd
from steps.utils import get_priority_link
class BOT(ABC):
'''
Abstract method for selenium bot
'''
@abstractmethod
def target_html() -> str:
pass
class CompetitorBot(BOT):
'''
Get Competitor info for doing the steps in order:
1> google search company_name competitors
2> select most appropriate search result
3> Go to website, scrape
'''
def target_html(self,company_name: str) -> str:
browser = webdriver.Chrome(options=options)
#Searching on FireFox
browser.get('http://www.google.com')
search = browser.find_element("name", "q")
search.send_keys(company_name + " craft.co competitors")
search.send_keys(Keys.RETURN) # hit return after you enter search text
time.sleep(5)
# Getting Search results
search_results = browser.find_elements(By.XPATH,"//a")
links = [result.get_attribute("href") for result in search_results]
#Getting Priority Link
website,idx = get_priority_link(links)
print(website,idx)
print(links[idx])
#go to the website
search_results[idx].click()
time.sleep(10)
html = browser.page_source
browser.quit()
return website,html
|