ksvmuralidhar commited on
Commit
046cc67
1 Parent(s): 152aad4

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +4 -1
scraper.py CHANGED
@@ -11,9 +11,12 @@ def scrape_text(url, n_words=15):
11
  try:
12
  driver = None
13
  logging.warning("Initiated Scraping")
 
 
 
14
  opts = FirefoxOptions()
15
  opts.add_argument("--headless")
16
- driver = webdriver.Firefox(options=opts)
17
  driver.set_page_load_timeout(60)
18
  driver.get(url)
19
  elem = driver.find_element(By.TAG_NAME, "body").text
 
11
  try:
12
  driver = None
13
  logging.warning("Initiated Scraping")
14
+ firefox_profile = webdriver.FirefoxProfile()
15
+ user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
16
+ firefox_profile.set_preference("general.useragent.override", user_agent)
17
  opts = FirefoxOptions()
18
  opts.add_argument("--headless")
19
+ driver = webdriver.Firefox(options=opts, firefox_profile=firefox_profile)
20
  driver.set_page_load_timeout(60)
21
  driver.get(url)
22
  elem = driver.find_element(By.TAG_NAME, "body").text