Patraskon commited on
Commit
82c2688
1 Parent(s): ebab014

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +87 -0
  2. products.py +114 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import logging
4
+ import requests
5
+ from urllib.parse import quote
6
+ import yaml
7
+
8
+ class Products:
9
+ data = [] # Список, в котором будут словари
10
+
11
+ def read_yaml_file(self, filename):
12
+ """Read the data from a YAML file and return a list of dictionaries"""
13
+ with open(filename, 'r') as file:
14
+ self.data = yaml.load(file, Loader=yaml.FullLoader)
15
+
16
+ def write_yaml_file(self, filename):
17
+ """Write the data in YAML format to a file"""
18
+ with open(filename, 'w') as file:
19
+ yaml.dump(self.data, file)
20
+
21
+ def sortProducts(self, criteria: str, order: bool = False):
22
+ """Sort list of products by given criteria:
23
+ \n price - price of a product
24
+ \n rating - total rating of a seller
25
+ \n sold - the amount of items sold
26
+ \n\n order - asc=0, desc=1
27
+ """
28
+ self.data.sort(key=operator.itemgetter(criteria), reverse=order)
29
+
30
+ def parseAPI(self, query):
31
+ """Find all elements on page and store them into the dictionary using plati.ru API"""
32
+ self.data = []
33
+ pagesize = 499
34
+ contents = requests.get(f"https://plati.io/api/search.ashx?query={quote(query)}&pagesize={pagesize}&visibleOnly=true&response=json").json()
35
+ total_pages = int(contents['Totalpages'])
36
+ for entry in contents['items']:
37
+ self.data.append(
38
+ {'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']), 'rating': float(entry['seller_rating']), 'sold': int(entry['numsold'])})
39
+ if total_pages > 1:
40
+ for i in range(2, total_pages + 1):
41
+ contents = requests.get(f"https://plati.io/api/search.ashx?query={quote(query)}&pagesize={pagesize}&pagenum={i}&visibleOnly=true&response=json").json()
42
+ for entry in contents['items']:
43
+ self.data.append(
44
+ {'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']), 'rating': float(entry['seller_rating']), 'sold': int(entry['numsold'])})
45
+
46
+ # Функция поиска, которая будет вызываться из интерфейса Gradio
47
+ def search(query):
48
+ logging.info(f"Search started with query: {query}")
49
+ products = Products()
50
+ products.parseAPI(query)
51
+ products.write_yaml_file("cache.yaml")
52
+ products.read_yaml_file("cache.yaml")
53
+ logging.info(f"Search results: {products.data}")
54
+
55
+ # Создаем новый DataFrame из списка словарей products.data
56
+ df = pd.DataFrame(products.data)
57
+ return df # Возвращаем DataFrame
58
+
59
+ # Добавляем функцию greet для демонстрации
60
+ def greet(name):
61
+ return "Hello " + name + "!!"
62
+
63
+ # Создание Gradio интерфейса
64
+ def create_interface():
65
+ with gr.Blocks() as demo:
66
+ gr.Markdown("# Plati.market Parser and Greet Function")
67
+ gr.Markdown("### Greeting Section")
68
+ greet_input = gr.Textbox(label="Enter your name")
69
+ greet_button = gr.Button("Greet")
70
+ greet_output = gr.Textbox(label="Greeting")
71
+ greet_button.click(fn=greet, inputs=greet_input, outputs=greet_output)
72
+
73
+ gr.Markdown("### Plati.market Search Section")
74
+ gr.Markdown("Input what you like to find in the field below. The results will be displayed in the table.")
75
+ search_input = gr.Textbox(label="Search Query")
76
+ search_button = gr.Button("Search")
77
+ data_table = gr.Dataframe(headers=["name", "link", "price", "rating", "sold"], interactive=True, label="Search Results")
78
+ search_button.click(fn=search, inputs=search_input, outputs=data_table)
79
+
80
+ return demo
81
+
82
+ if __name__ == "__main__":
83
+ # Настройка логирования
84
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
85
+
86
+ demo = create_interface()
87
+ demo.launch()
products.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from urllib.parse import quote
2
+ import time
3
+ import re
4
+ import operator
5
+ from operator import itemgetter
6
+ from urllib.parse import quote
7
+ import yaml
8
+
9
+ import requests
10
+ import json
11
+ from selenium import webdriver
12
+ from selenium.webdriver.common.by import By
13
+ from selenium.webdriver.edge.options import Options
14
+
15
+
16
+ class Products:
17
+ data = [] # Список, в котором будут словари
18
+
19
+ def read_yaml_file(self, filename):
20
+ """Read the data from a YAML file and return a list of dictionaries"""
21
+ with open(filename, 'r') as file:
22
+ self.data = yaml.load(file, Loader=yaml.FullLoader)
23
+ return
24
+
25
+ def write_yaml_file(self, filename):
26
+ """Write the data in YAML format to a file"""
27
+ with open(filename, 'w') as file:
28
+ yaml.dump(self.data, file)
29
+
30
+ def sortProducts(self,criteria: str,order :bool=False):
31
+ """Sort list of products by given criteria:
32
+ \n price - price of a product
33
+ \n rating - total rating of a seller
34
+ \n sold - the amount of items sold
35
+ \n\n order - asc=0, desc=1
36
+ """
37
+ self.data.sort(key=operator.itemgetter(criteria),reverse=order)
38
+
39
+ def printData(self):
40
+ for item in self.data:
41
+ print(item['name']+'\t'+item['link'] +
42
+ '\n'+str(item['price'])+" "+str(item['rating'])+" "+str(item['sold'])+'\n\n')
43
+
44
+ def ParsePage(self, query):
45
+ """Find all elements on page and store them into the dictionary
46
+ \n use parseAPI function instead if possible
47
+ """
48
+ self.data=[]
49
+ options = Options()
50
+ options.add_argument("headless")# Показывать ли окно браузера
51
+
52
+ browser = webdriver.Edge(
53
+ executable_path='msedgedriver.exe', options=options)
54
+ q=quote(query)
55
+ url = f"https://plati.market/search/{q}"
56
+ browser.get(url)
57
+ time.sleep(1)
58
+ #Searching through website
59
+ while True:
60
+
61
+ try:
62
+ browser.find_element(By.ID, 'gdpr_accept_button').click()#Accept cookies if present, DO NOT REMOVE
63
+ except:
64
+ pass
65
+ try:
66
+ # :Список всех блоков с товаром
67
+ allBlocks = browser.find_elements(By.CSS_SELECTOR, 'li.shadow')
68
+
69
+ pageNumber = int(browser.find_elements(By.CSS_SELECTOR, 'a.active')[
70
+ 1].text) # :Номер текущей страницы
71
+ except:
72
+ print("Результаты не найдены")
73
+ break
74
+ for block in allBlocks:
75
+ BlockTitle = block.find_element(
76
+ By.TAG_NAME, 'h1') # """ Часть блока с ценой и названием"""
77
+ BlockName = BlockTitle.find_element(
78
+ By.TAG_NAME, 'a') # """Название блока"""
79
+ BlockLink = BlockName.get_attribute(
80
+ 'href') # """Ссылка на продукт"""
81
+ BlockPrice = BlockTitle.find_element(By.TAG_NAME, 'span')
82
+ RubPrice = re.search(" [0-9]+ ", BlockPrice.text)
83
+ RubPrice = int(RubPrice.group(0))
84
+ BlockInfo = block.find_elements(By.TAG_NAME,"strong")#all additional information
85
+ BlockRating= float(BlockInfo[0].text.replace(',','.'))#sellers rating
86
+ try:
87
+ BlockSold = int(BlockInfo[1].text.replace('>',''))#Total amount sold
88
+ except:
89
+ BlockSold=0#На случай если у блока нет информации
90
+ pass
91
+
92
+
93
+ self.data.append(
94
+ {'name': BlockName.text, 'link': BlockLink, 'price': RubPrice,'rating':BlockRating,'sold':BlockSold})
95
+ try:
96
+ browser.find_element(By.LINK_TEXT, str(pageNumber+1)).click()# Переход на следующую страницу
97
+ except:
98
+ break
99
+ def parseAPI(self,query):
100
+ """Find all elements on page and store them into the dictionary using plati.ru API"""
101
+ self.data=[]
102
+ """pagesize should be less than 500"""
103
+ pagesize=499
104
+ contents=requests.get(f"https://plati.io/api/search.ashx?query={query}&pagesize={pagesize}&visibleOnly=true&response=json").json()
105
+ total_pages=int(contents['Totalpages'])
106
+ for entry in contents['items']:
107
+ self.data.append(
108
+ {'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']),'rating':float(entry['seller_rating']),'sold':int(entry['numsold'])})
109
+ if (total_pages>1):
110
+ for i in range(2,total_pages+1):
111
+ contents=requests.get(f"https://plati.io/api/search.ashx?query={query}&pagesize={pagesize}&pagenum={i}&visibleOnly=true&response=json").json()
112
+ for entry in contents['items']:
113
+ self.data.append(
114
+ {'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']),'rating':float(entry['seller_rating']),'sold':int(entry['numsold'])})