Spaces:

1s
/

PlatiParserGradio

Runtime error

App Files Files Community

Patraskon commited on Mar 6

Commit

82c2688

•

1 Parent(s): ebab014

Initial commit

Browse files

Files changed (2) hide show

app.py +87 -0
products.py +114 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import gradio as gr
+import pandas as pd
+import logging
+import requests
+from urllib.parse import quote
+import yaml
+class Products:
+    data = []  # Список, в котором будут словари
+    def read_yaml_file(self, filename):
+        """Read the data from a YAML file and return a list of dictionaries"""
+        with open(filename, 'r') as file:
+            self.data = yaml.load(file, Loader=yaml.FullLoader)
+    def write_yaml_file(self, filename):
+        """Write the data in YAML format to a file"""
+        with open(filename, 'w') as file:
+            yaml.dump(self.data, file)
+    def sortProducts(self, criteria: str, order: bool = False):
+        """Sort list of products by given criteria:
+        \n price - price of a product
+        \n rating - total rating of a seller
+        \n sold - the amount of items sold
+        \n\n order - asc=0, desc=1
+        """
+        self.data.sort(key=operator.itemgetter(criteria), reverse=order)
+    def parseAPI(self, query):
+        """Find all elements on page and store them into the dictionary using plati.ru API"""
+        self.data = []
+        pagesize = 499
+        contents = requests.get(f"https://plati.io/api/search.ashx?query={quote(query)}&pagesize={pagesize}&visibleOnly=true&response=json").json()
+        total_pages = int(contents['Totalpages'])
+        for entry in contents['items']:
+            self.data.append(
+                {'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']), 'rating': float(entry['seller_rating']), 'sold': int(entry['numsold'])})
+        if total_pages > 1:
+            for i in range(2, total_pages + 1):
+                contents = requests.get(f"https://plati.io/api/search.ashx?query={quote(query)}&pagesize={pagesize}&pagenum={i}&visibleOnly=true&response=json").json()
+                for entry in contents['items']:
+                    self.data.append(
+                        {'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']), 'rating': float(entry['seller_rating']), 'sold': int(entry['numsold'])})
+# Функция поиска, которая будет вызываться из интерфейса Gradio
+def search(query):
+    logging.info(f"Search started with query: {query}")
+    products = Products()
+    products.parseAPI(query)
+    products.write_yaml_file("cache.yaml")
+    products.read_yaml_file("cache.yaml")
+    logging.info(f"Search results: {products.data}")
+    # Создаем новый DataFrame из списка словарей products.data
+    df = pd.DataFrame(products.data)
+    return df  # Возвращаем DataFrame
+# Добавляем функцию greet для демонстрации
+def greet(name):
+    return "Hello " + name + "!!"
+# Создание Gradio интерфейса
+def create_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Plati.market Parser and Greet Function")
+        gr.Markdown("### Greeting Section")
+        greet_input = gr.Textbox(label="Enter your name")
+        greet_button = gr.Button("Greet")
+        greet_output = gr.Textbox(label="Greeting")
+        greet_button.click(fn=greet, inputs=greet_input, outputs=greet_output)
+        gr.Markdown("### Plati.market Search Section")
+        gr.Markdown("Input what you like to find in the field below. The results will be displayed in the table.")
+        search_input = gr.Textbox(label="Search Query")
+        search_button = gr.Button("Search")
+        data_table = gr.Dataframe(headers=["name", "link", "price", "rating", "sold"], interactive=True, label="Search Results")
+        search_button.click(fn=search, inputs=search_input, outputs=data_table)
+    return demo
+if __name__ == "__main__":
+    # Настройка логирования
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    demo = create_interface()
+    demo.launch()

products.py ADDED Viewed

	@@ -0,0 +1,114 @@

+from urllib.parse import quote
+import time
+import re
+import operator
+from operator import itemgetter
+from urllib.parse import quote
+import yaml
+import requests
+import json
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.edge.options import Options
+class Products:
+    data = []  # Список, в котором будут словари
+    def read_yaml_file(self, filename):
+        """Read the data from a YAML file and return a list of dictionaries"""
+        with open(filename, 'r') as file:
+            self.data = yaml.load(file, Loader=yaml.FullLoader)
+        return
+    def write_yaml_file(self, filename):
+        """Write the data in YAML format to a file"""
+        with open(filename, 'w') as file:
+            yaml.dump(self.data, file)
+    def sortProducts(self,criteria: str,order :bool=False):
+        """Sort list of products by given criteria:
+        \n price - price of a product
+        \n rating - total rating of a seller
+        \n sold - the amount of items sold
+        \n\n order - asc=0, desc=1
+        """
+        self.data.sort(key=operator.itemgetter(criteria),reverse=order)
+    def printData(self):
+        for item in self.data:
+            print(item['name']+'\t'+item['link'] +
+                  '\n'+str(item['price'])+"  "+str(item['rating'])+"   "+str(item['sold'])+'\n\n')
+    def ParsePage(self, query):
+        """Find all elements on page and store them into the dictionary
+        \n use parseAPI function instead if possible
+        """
+        self.data=[]
+        options = Options()
+        options.add_argument("headless")# Показывать ли окно браузера
+        browser = webdriver.Edge(
+            executable_path='msedgedriver.exe', options=options)
+        q=quote(query)
+        url = f"https://plati.market/search/{q}"
+        browser.get(url)
+        time.sleep(1)
+        #Searching through website
+        while True:
+            try:
+                browser.find_element(By.ID, 'gdpr_accept_button').click()#Accept cookies if present, DO NOT REMOVE
+            except:
+                pass
+            try:
+                # :Список всех блоков с товаром
+                allBlocks = browser.find_elements(By.CSS_SELECTOR, 'li.shadow')
+                pageNumber = int(browser.find_elements(By.CSS_SELECTOR, 'a.active')[
+                                 1].text)  # :Номер текущей страницы
+            except:
+                print("Результаты не найдены")
+                break
+            for block in allBlocks:
+                BlockTitle = block.find_element(
+                    By.TAG_NAME, 'h1')  # """ Часть блока с ценой и названием"""
+                BlockName = BlockTitle.find_element(
+                    By.TAG_NAME, 'a')  # """Название блока"""
+                BlockLink = BlockName.get_attribute(
+                    'href')  # """Ссылка на продукт"""
+                BlockPrice = BlockTitle.find_element(By.TAG_NAME, 'span')
+                RubPrice = re.search(" [0-9]+ ", BlockPrice.text)
+                RubPrice = int(RubPrice.group(0))
+                BlockInfo = block.find_elements(By.TAG_NAME,"strong")#all additional information
+                BlockRating= float(BlockInfo[0].text.replace(',','.'))#sellers rating
+                try:
+                    BlockSold = int(BlockInfo[1].text.replace('>',''))#Total amount sold
+                except:
+                    BlockSold=0#На случай если у блока нет информации
+                    pass
+                self.data.append(
+                    {'name': BlockName.text, 'link': BlockLink, 'price': RubPrice,'rating':BlockRating,'sold':BlockSold})
+            try:
+                browser.find_element(By.LINK_TEXT, str(pageNumber+1)).click()# Переход на следующую страницу
+            except:
+                break
+    def parseAPI(self,query):
+        """Find all elements on page and store them into the dictionary using plati.ru API"""
+        self.data=[]
+        """pagesize should be less than 500"""
+        pagesize=499
+        contents=requests.get(f"https://plati.io/api/search.ashx?query={query}&pagesize={pagesize}&visibleOnly=true&response=json").json()
+        total_pages=int(contents['Totalpages'])
+        for entry in contents['items']:
+            self.data.append(
+                    {'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']),'rating':float(entry['seller_rating']),'sold':int(entry['numsold'])})
+        if (total_pages>1):
+            for i in range(2,total_pages+1):
+                contents=requests.get(f"https://plati.io/api/search.ashx?query={query}&pagesize={pagesize}&pagenum={i}&visibleOnly=true&response=json").json()
+                for entry in contents['items']:
+                    self.data.append(
+                        {'name': entry['name'], 'link': entry['url'], 'price': int(entry['price_rur']),'rating':float(entry['seller_rating']),'sold':int(entry['numsold'])})