File size: 1,758 Bytes
08e59e6
 
 
 
 
 
bbb039b
431ef39
 
 
 
bbb039b
 
08e59e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431ef39
08e59e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys

options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')



from abc import ABC,abstractmethod
import pandas as pd

from steps.utils import get_priority_link


class BOT(ABC):
    '''
    Abstract method for selenium bot
    '''
    @abstractmethod
    def target_html() -> str:
        pass


class CompetitorBot(BOT):
    '''
    Get Competitor info for doing the steps in order:
        1> google search company_name competitors
        2> select most appropriate search result
        3> Go to website, scrape
    '''

    def target_html(self,company_name: str) -> str:
        browser = webdriver.Chrome(options=options)

        #Searching on FireFox
        browser.get('http://www.google.com')
        search = browser.find_element("name", "q")
        search.send_keys(company_name + " craft.co competitors")
        search.send_keys(Keys.RETURN) # hit return after you enter search text
        time.sleep(5) 


        # Getting Search results
        search_results = browser.find_elements(By.XPATH,"//a")
        links = [result.get_attribute("href") for result in search_results]

        #Getting Priority Link
        website,idx = get_priority_link(links)
        print(website,idx)
        print(links[idx])

        #go to the website
        search_results[idx].click()
        time.sleep(10)

        html = browser.page_source
        browser.quit()

        return website,html