ksvmuralidhar commited on
Commit
aa42935
1 Parent(s): 046cc67

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +2 -3
scraper.py CHANGED
@@ -11,12 +11,11 @@ def scrape_text(url, n_words=15):
11
  try:
12
  driver = None
13
  logging.warning("Initiated Scraping")
14
- firefox_profile = webdriver.FirefoxProfile()
15
  user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
16
- firefox_profile.set_preference("general.useragent.override", user_agent)
17
  opts = FirefoxOptions()
18
  opts.add_argument("--headless")
19
- driver = webdriver.Firefox(options=opts, firefox_profile=firefox_profile)
 
20
  driver.set_page_load_timeout(60)
21
  driver.get(url)
22
  elem = driver.find_element(By.TAG_NAME, "body").text
 
11
  try:
12
  driver = None
13
  logging.warning("Initiated Scraping")
 
14
  user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
 
15
  opts = FirefoxOptions()
16
  opts.add_argument("--headless")
17
+ opts.add_argument(f"user-agent={user_agent}")
18
+ driver = webdriver.Firefox(options=opts)
19
  driver.set_page_load_timeout(60)
20
  driver.get(url)
21
  elem = driver.find_element(By.TAG_NAME, "body").text