Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,197 Bytes
a746d34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import requests
from bs4 import BeautifulSoup
import openpyxl
import schedule
import time
class Scraper:
def __init__(self, url, output_file):
self.url = url
self.output_file = output_file
def scrape(self):
# Send request to the website
response = requests.get(self.url)
soup = BeautifulSoup(response.content, 'html.parser')
# Extract product data
products = []
for product in soup.find_all('div', {'class': 'product'}):
name = product.find('h2', {'class': 'product-name'}).text.strip()
price = product.find('span', {'class': 'price'}).text.strip()
products.append({'name': name, 'price': price})
# Save data to xlsx file
wb = openpyxl.Workbook()
ws = wb.active
ws.append(['Name', 'Price'])
for product in products:
ws.append([product['name'], product['price']])
wb.save(self.output_file)
def daily_scrape():
scraper = Scraper('https://example.com', 'output.xlsx')
scraper.scrape()
schedule.every(1).day.at("00:00").do(daily_scrape) # Run daily at midnight
while True:
schedule.run_pending()
time.sleep(1) |