PlatiParserGradio / products.py
Patraskon
Initial commit
82c2688
from urllib.parse import quote
import time
import re
import operator
from operator import itemgetter
from urllib.parse import quote
import yaml
import requests
import json
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.options import Options
class Products:
data = [] # Список, в котором будут словари
def read_yaml_file(self, filename):
"""Read the data from a YAML file and return a list of dictionaries"""
with open(filename, 'r') as file:
self.data = yaml.load(file, Loader=yaml.FullLoader)
return
def write_yaml_file(self, filename):
"""Write the data in YAML format to a file"""
with open(filename, 'w') as file:
yaml.dump(self.data, file)
def sortProducts(self,criteria: str,order :bool=False):
"""Sort list of products by given criteria:
\n price - price of a product
\n rating - total rating of a seller
\n sold - the amount of items sold
\n\n order - asc=0, desc=1
"""
self.data.sort(key=operator.itemgetter(criteria),reverse=order)
def printData(self):
for item in self.data:
print(item['name']+'\t'+item['link'] +
'\n'+str(item['price'])+" "+str(item['rating'])+" "+str(item['sold'])+'\n\n')
def ParsePage(self, query):
"""Find all elements on page and store them into the dictionary
\n use parseAPI function instead if possible
"""
self.data=[]
options = Options()
options.add_argument("headless")# Показывать ли окно браузера
browser = webdriver.Edge(
executable_path='msedgedriver.exe', options=options)
q=quote(query)
url = f"https://plati.market/search/{q}"
browser.get(url)
time.sleep(1)
#Searching through website
while True:
try:
browser.find_element(By.ID, 'gdpr_accept_button').click()#Accept cookies if present, DO NOT REMOVE
except:
pass
try:
# :Список всех блоков с товаром
allBlocks = browser.find_elements(By.CSS_SELECTOR, 'li.shadow')
pageNumber = int(browser.find_elements(By.CSS_SELECTOR, 'a.active')[
1].text) # :Номер текущей страницы
except:
print("Результаты не найдены")
break
for block in allBlocks:
BlockTitle = block.find_element(
By.TAG_NAME, 'h1') # """ Часть блока с ценой и названием"""
BlockName = BlockTitle.find_element(
By.TAG_NAME, 'a') # """Название блока"""
BlockLink = BlockName.get_attribute(
'href') # """Ссылка на продукт"""
BlockPrice = BlockTitle.find_element(By.TAG_NAME, 'span')
RubPrice = re.search(" [0-9]+ ", BlockPrice.text)
RubPrice = int(RubPrice.group(0))
BlockInfo = block.find_elements(By.TAG_NAME,"strong")#all additional information
BlockRating= float(BlockInfo[0].text.replace(',','.'))#sellers rating
try:
BlockSold = int(BlockInfo[1].text.replace('>',''))#Total amount sold
except:
BlockSold=0#На случай если у блока нет информации
pass
self.data.append(
{'name': BlockName.text, 'link': BlockLink, 'price': RubPrice,'rating':BlockRating,'sold':BlockSold})
try:
browser.find_element(By.LINK_TEXT, str(pageNumber+1)).click()# Переход на следующую страницу
except:
break
def parseAPI(self,query):
"""Find all elements on page and store them into the dictionary using plati.ru API"""
self.data=[]
"""pagesize should be less than 500"""
pagesize=499
contents=requests.get(f"https://plati.io/api/search.ashx?query={query}&pagesize={pagesize}&visibleOnly=true&response=json").json()
total_pages=int(contents['Totalpages'])
for entry in contents['items']:
self.data.append(
{'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']),'rating':float(entry['seller_rating']),'sold':int(entry['numsold'])})
if (total_pages>1):
for i in range(2,total_pages+1):
contents=requests.get(f"https://plati.io/api/search.ashx?query={query}&pagesize={pagesize}&pagenum={i}&visibleOnly=true&response=json").json()
for entry in contents['items']:
self.data.append(
{'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']),'rating':float(entry['seller_rating']),'sold':int(entry['numsold'])})