diffusers-gallery-bot

Running on CPU Upgrade

App Files Files Community

radames commited on Feb 17, 2023

Commit

42e31b9

•

1 Parent(s): bef266a

classification code

Browse files

Files changed (2) hide show

app.py +72 -52
classifier.py +70 -0

app.py CHANGED Viewed

@@ -31,6 +31,10 @@ S3_DATA_FOLDER = Path("sd-multiplayer-data")
 DB_FOLDER = Path("diffusers-gallery-data")
 s3 = boto3.client(service_name='s3',
                   aws_access_key_id=AWS_ACCESS_KEY_ID,
                   aws_secret_access_key=AWS_SECRET_KEY)
@@ -76,9 +80,9 @@ def fetch_models(page=0):
     }
-def fetch_model_card(model):
     response = requests.get(
-        f'https://huggingface.co/{model["id"]}/raw/main/README.md')
     return response.text
@@ -94,16 +98,31 @@ async def find_image_in_model_card(text):
         return await asyncio.gather(*tasks)
-def run_inference(endpoint, img):
-    headers = {'Authorization': f'Bearer {HF_TOKEN}',
-               "X-Wait-For-Model": "true",
-               "X-Use-Cache": "true"}
-    response = requests.post(endpoint, headers=headers, data=img)
-    return response.json() if response.ok else []
-async def get_all_models():
     initial = fetch_models(0)
     num_pages = ceil(initial['numTotalItems'] / initial['numItemsPerPage'])
@@ -112,54 +131,55 @@ async def get_all_models():
     print(f"Found {num_pages} pages")
     # fetch all models
-    models = []
     for page in tqdm(range(0, num_pages)):
         print(f"Fetching page {page} of {num_pages}")
         page_models = fetch_models(page)
-        models += page_models['models']
-    with open(DB_FOLDER / "models_temp.json", "w") as f:
-        json.dump(models, f)
-    # fetch datacards and images
-    print(f"Found {len(models)} models")
-    final_models = []
-    for model in tqdm(models):
-        print(f"Fetching model {model['id']}")
-        model_card = fetch_model_card(model)
-        images = await find_image_in_model_card(model_card)
-        # style = await run_inference(f"https://api-inference.huggingface.co/models/{model['id']}", images[0])
-        style = []
-        # aesthetic = await run_inference(f"https://api-inference.huggingface.co/models/{model['id']}", images[0])
-        aesthetic = []
-        final_models.append(
-            {**model, "images": images, "style": style, "aesthetic": aesthetic}
-        )
-    return final_models
 async def sync_data():
     print("Fetching models")
-    models = await get_all_models()
     with open(DB_FOLDER / "models.json", "w") as f:
-        json.dump(models, f)
     # with open(DB_FOLDER / "models.json", "r") as f:
-    #     models = json.load(f)
-    # open temp db
-    print("Updating database")
     with database.get_db() as db:
         cursor = db.cursor()
-        for model in models:
-            try:
-                cursor.execute("INSERT INTO models(id, data) VALUES (?, ?)",
-                               [model['id'], json.dumps(model)])
-            except Exception as e:
-                print(model['id'], model)
-        db.commit()
-    print("Updating repository")
-    subprocess.Popen(
-        "git add . && git commit --amend -m 'update' && git push --force", cwd=DB_FOLDER, shell=True)
 app = FastAPI()
@@ -174,7 +194,7 @@ app.add_middleware(
 # @ app.get("/sync")
 # async def sync(background_tasks: BackgroundTasks):
-#     background_tasks.add_task(sync_data)
 #     return "Synced data to huggingface datasets"
@@ -189,16 +209,16 @@ def get_page(page: int = 1):
         cursor.execute("""
             SELECT *, COUNT(*) OVER() AS total
             FROM models
-            WHERE json_extract(data, '$.likes') > 5
-            ORDER BY json_extract(data, '$.likes') DESC, datetime(json_extract(data, '$.lastModified')) DESC
             LIMIT ? OFFSET ?
         """, (MAX_PAGE_SIZE, (page - 1) * MAX_PAGE_SIZE))
         results = cursor.fetchall()
-        total = results[0][3] if results else 0
         total_pages = (total + MAX_PAGE_SIZE - 1) // MAX_PAGE_SIZE
     return {
-        "models": [json.loads(result[1]) for result in results],
         "totalPages": total_pages
     }

 DB_FOLDER = Path("diffusers-gallery-data")
+CLASSIFIER_URL = "https://radames-aesthetic-style-nsfw-classifier.hf.space/run/inference"
+ASSETS_URL = "https://d26smi9133w0oo.cloudfront.net/diffusers-gallery/"
 s3 = boto3.client(service_name='s3',
                   aws_access_key_id=AWS_ACCESS_KEY_ID,
                   aws_secret_access_key=AWS_SECRET_KEY)
     }
+def fetch_model_card(model_id):
     response = requests.get(
+        f'https://huggingface.co/{model_id}/raw/main/README.md')
     return response.text
         return await asyncio.gather(*tasks)
+def run_classifier(images):
+    images = [i for i in images if i is not None]
+    if len(images) > 0:
+        # classifying only the first image
+        images_urls = [ASSETS_URL + images[0]]
+        response = requests.post(CLASSIFIER_URL, json={"data": [
+            {"urls": images_urls},  # json urls: list of images urls
+            False,  # enable/disable gallery image output
+            None,  # single image input
+            None,  # files input
+        ]}).json()
+        # data response is array data:[[{img0}, {img1}, {img2}...], Label, Gallery],
+        class_data = response['data'][0][0]
+        print(class_data)
+        class_data_parsed = {row['label']: round(
+            row['score'], 3) for row in class_data}
+        # update row data with classificator data
+        return class_data_parsed
+    else:
+        return {}
+async def get_all_new_models():
     initial = fetch_models(0)
     num_pages = ceil(initial['numTotalItems'] / initial['numItemsPerPage'])
     print(f"Found {num_pages} pages")
     # fetch all models
+    new_models = []
     for page in tqdm(range(0, num_pages)):
         print(f"Fetching page {page} of {num_pages}")
         page_models = fetch_models(page)
+        new_models += page_models['models']
+    return new_models
 async def sync_data():
     print("Fetching models")
+    new_models = await get_all_new_models()
+    print(f"Found {len(new_models)} models")
+    # save list of all models for ids
     with open(DB_FOLDER / "models.json", "w") as f:
+        json.dump(new_models, f)
     # with open(DB_FOLDER / "models.json", "r") as f:
+    #     new_models = json.load(f)
+    new_models_ids = [model['id'] for model in new_models]
+    # get existing models
     with database.get_db() as db:
         cursor = db.cursor()
+        cursor.execute("SELECT id FROM models")
+        existing_models = [row['id'] for row in cursor.fetchall()]
+    models_ids_to_add = list(set(new_models_ids) - set(existing_models))
+    # find all models id to add from new_models
+    models = [model for model in new_models if model['id'] in models_ids_to_add]
+    print(f"Found {len(models)} new models")
+    for model in tqdm(models):
+        model_id = model['id']
+        model_card = fetch_model_card(model_id)
+        images = await find_image_in_model_card(model_card)
+        classifier = run_classifier(images)
+        # update model row with image and classifier data
+        with database.get_db() as db:
+            cursor = db.cursor()
+            cursor.execute("INSERT INTO models(id, data) VALUES (?, ?)",
+                           [model_id, json.dumps({
+                               **model,
+                               "images": images,
+                               "class": classifier
+                           })])
+            db.commit()
+    # print("Updating repository")
+    # subprocess.Popen(
+    #     "git add . && git commit --amend -m 'update' && git push --force", cwd=DB_FOLDER, shell=True)
 app = FastAPI()
 # @ app.get("/sync")
 # async def sync(background_tasks: BackgroundTasks):
+#     await sync_data()
 #     return "Synced data to huggingface datasets"
         cursor.execute("""
             SELECT *, COUNT(*) OVER() AS total
             FROM models
+            WHERE json_extract(data, '$.likes') > 4
+            ORDER BY datetime(json_extract(data, '$.lastModified')) DESC
             LIMIT ? OFFSET ?
         """, (MAX_PAGE_SIZE, (page - 1) * MAX_PAGE_SIZE))
         results = cursor.fetchall()
+        total = results[0]['total'] if results else 0
         total_pages = (total + MAX_PAGE_SIZE - 1) // MAX_PAGE_SIZE
     return {
+        "models": [json.loads(result['data']) for result in results],
         "totalPages": total_pages
     }

classifier.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import os
+import re
+import requests
+import json
+import subprocess
+from io import BytesIO
+import uuid
+from math import ceil
+from tqdm import tqdm
+from pathlib import Path
+from db import Database
+DB_FOLDER = Path("diffusers-gallery-data")
+database = Database(DB_FOLDER)
+CLASSIFIER_URL = "https://radames-aesthetic-style-nsfw-classifier.hf.space/run/inference"
+ASSETS_URL = "https://d26smi9133w0oo.cloudfront.net/diffusers-gallery/"
+def main():
+    with database.get_db() as db:
+        cursor = db.cursor()
+        cursor.execute("""
+            SELECT *
+            FROM models
+        """)
+        results = list(cursor.fetchall())
+    for row in tqdm(results):
+        row_id = row['id']
+        # keep json data on row_data
+        row_data = json.loads(row['data'])
+        print("updating row", row_id)
+        images = row_data['images']
+        # filter nones
+        images = [i for i in images if i is not None]
+        if len(images) > 0:
+            # classifying only the first image
+            images_urls = [ASSETS_URL + images[0]]
+            response = requests.post(CLASSIFIER_URL, json={"data": [
+                {"urls": images_urls},  # json urls: list of images urls
+                False,  # enable/disable gallery image output
+                None,  # single image input
+                None,  # files input
+            ]}).json()
+            # data response is array data:[[{img0}, {img1}, {img2}...], Label, Gallery],
+            class_data = response['data'][0][0]
+            class_data_parsed = {row['label']: round(
+                row['score'], 3) for row in class_data}
+            # update row data with classificator data
+            row_data['class'] = class_data_parsed
+        else:
+            row_data['class'] = {}
+        with database.get_db() as db:
+            cursor = db.cursor()
+            cursor.execute("UPDATE models SET data = ? WHERE id = ?",
+                           [json.dumps(row_data), row_id])
+            db.commit()
+if __name__ == "__main__":
+    main()