Spaces:

Unicone-Studio
/

load-balancer

Paused

App Files Files Community

ChandimaPrabath commited on Aug 15

Commit

e1cb714

•

1 Parent(s): e0663d3

revert to 0.0.2.7 V Beta

Browse files

Files changed (3) hide show

LoadBalancer.py +64 -52
hf_scrapper.py +21 -21
indexer.py +20 -34

LoadBalancer.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import os
 import json
-import asyncio
-import logging
-import re
 from indexer import indexer
 from tvdb import fetch_and_cache_json
 from utils import convert_to_gb
 from api import InstancesAPI
@@ -14,13 +15,13 @@ download_progress = {}
 class LoadBalancer:
     def __init__(self, cache_dir, index_file, token, repo, polling_interval=4, max_retries=3, initial_delay=1):
-        self.version = "0.0.2.9 V Beta"
         self.instances = []
         self.instances_health = {}
         self.polling_interval = polling_interval
         self.max_retries = max_retries
         self.initial_delay = initial_delay
-        self.stop_event = asyncio.Event()
         self.instances_api = InstancesAPI(self.instances)
         self.CACHE_DIR = cache_dir
         self.INDEX_FILE = index_file
@@ -39,13 +40,18 @@ class LoadBalancer:
         indexer()
         # Load the file structure JSON
-        asyncio.run(self.load_file_structure())
-        # Start polling and file checking in separate tasks
-        asyncio.create_task(self.start_polling())
-        asyncio.create_task(self.check_file_updates())
-    async def load_file_structure(self):
         if not os.path.exists(self.INDEX_FILE):
             raise FileNotFoundError(f"{self.INDEX_FILE} not found. Please make sure the file exists.")
@@ -53,21 +59,23 @@ class LoadBalancer:
             self.file_structure = json.load(f)
         logging.info("File structure loaded successfully.")
-    async def check_file_updates(self):
         while not self.stop_event.is_set():
             if self.index_file_last_modified != os.path.getmtime(self.INDEX_FILE):
                 logging.info(f"{self.INDEX_FILE} has been updated. Re-indexing...")
                 indexer()  # Re-run the indexer
-                await self.load_file_structure()  # Reload the file structure
                 self.index_file_last_modified = os.path.getmtime(self.INDEX_FILE)
-                # Restart prefetching task
-                if hasattr(self, 'prefetch_task') and not self.prefetch_task.done():
-                    await self.prefetch_task
-                self.prefetch_task = asyncio.create_task(self.start_prefetching())
-            await asyncio.sleep(120)  # Check every 2 minutes
     def register_instance(self, instance_url):
         if instance_url not in self.instances:
@@ -84,8 +92,8 @@ class LoadBalancer:
         else:
             logging.info(f"Instance {instance_url} not found for removal.")
-    async def get_reports(self):
-        reports = await self.instances_api.fetch_reports()
         # Initialize temporary JSON data holders
         temp_film_store = {}
@@ -129,27 +137,30 @@ class LoadBalancer:
         logging.info("Film and TV Stores processed successfully.")
         self.update_instances_health(instance=instance_url, cache_size=cache_size)
-    async def start_polling(self):
         logging.info("Starting polling.")
         while not self.stop_event.is_set():
-            await self.get_reports()
-            await asyncio.sleep(self.polling_interval)
         logging.info("Polling stopped.")
-    async def stop_polling(self):
         logging.info("Stopping polling.")
         self.stop_event.set()
-    async def start_prefetching(self):
-        """Start the metadata prefetching."""
-        await self.prefetch_metadata()
     def update_instances_health(self, instance, cache_size):
-        self.instances_health[instance] = {"used": cache_size["cache_size"],
                                            "total": "50 GB"}
         logging.info(f"Updated instance {instance} with cache size {cache_size}")
-    async def download_film_to_best_instance(self, title):
         """
         Downloads a film to the first instance that has more free space on the self.instance_health list variable.
         The instance_health looks like this:
@@ -176,14 +187,14 @@ class LoadBalancer:
                 best_instance = instance_url
         if best_instance:
-            result = await self.instances_api.download_film(best_instance, title)
             film_id = result["film_id"]
             status = result["status"]
             progress_url = f'{best_instance}/api/progress/{film_id}'
             response = {
-                "film_id": film_id,
-                "status": status,
-                "progress_url": progress_url
             }
             return response
@@ -191,9 +202,9 @@ class LoadBalancer:
             logging.error("No suitable instance found for downloading the film.")
             return {"error": "No suitable instance found for downloading the film."}
-    async def download_episode_to_best_instance(self, title, season, episode):
         """
-        Downloads an episode to the first instance that has more free space on the self.instance_health list variable.
         The instance_health looks like this:
         {
             "https://unicone-studio-instance1.hf.space": {
@@ -202,9 +213,9 @@ class LoadBalancer:
             }
         }
         Args:
-            title (str): The title of the TV show.
-            season (str): The season of the TV show.
-            episode (str): The episode of the TV show.
         """
         best_instance = None
         max_free_space = -1
@@ -220,22 +231,23 @@ class LoadBalancer:
                 best_instance = instance_url
         if best_instance:
-            result = await self.instances_api.download_episode(best_instance, title, season, episode)
             episode_id = result["episode_id"]
             status = result["status"]
             progress_url = f'{best_instance}/api/progress/{episode_id}'
             response = {
-                "episode_id": episode_id,
-                "status": status,
-                "progress_url": progress_url
             }
             return response
         else:
-            logging.error("No suitable instance found for downloading the episode.")
-            return {"error": "No suitable instance found for downloading the episode."}
-    async def find_movie_path(self, title):
         """Find the path of the movie in the JSON data based on the title."""
         for directory in self.file_structure:
             if directory['type'] == 'directory' and directory['path'] == 'films':
@@ -246,7 +258,7 @@ class LoadBalancer:
                                 return item['path']
         return None
-    async def find_tv_path(self, title):
         """Find the path of the TV show in the JSON data based on the title."""
         for directory in self.file_structure:
             if directory['type'] == 'directory' and directory['path'] == 'tv':
@@ -255,7 +267,7 @@ class LoadBalancer:
                         return sub_directory['path']
         return None
-    async def get_tv_structure(self, title):
         """Find the path of the TV show in the JSON data based on the title."""
         for directory in self.file_structure:
             if directory['type'] == 'directory' and directory['path'] == 'tv':
@@ -264,11 +276,11 @@ class LoadBalancer:
                         return sub_directory
         return None
-    async def get_film_id(self, title):
         """Generate a film ID based on the title."""
         return title.replace(" ", "_").lower()
-    async def prefetch_metadata(self):
         """Prefetch metadata for all items in the file structure."""
         for item in self.file_structure:
             if 'contents' in item:
@@ -291,9 +303,9 @@ class LoadBalancer:
                             title = parts[0].strip()
                             year = int(parts[-1])
-                    await fetch_and_cache_json(original_title, title, media_type, year)
-    async def get_all_tv_shows(self):
         """Get all TV shows from the indexed cache structure JSON file."""
         tv_shows = {}
         for directory in self.file_structure:
@@ -314,7 +326,7 @@ class LoadBalancer:
                                         })
         return tv_shows
-    async def get_all_films(self):
         """Get all films from the indexed cache structure JSON file."""
         films = []
         for directory in self.file_structure:
@@ -322,4 +334,4 @@ class LoadBalancer:
                 for sub_directory in directory['contents']:
                     if sub_directory['type'] == 'directory':
                         films.append(sub_directory['path'])
-        return films

 import os
 import json
 from indexer import indexer
+import re
 from tvdb import fetch_and_cache_json
+from threading import Event, Thread
+import time
+import logging
 from utils import convert_to_gb
 from api import InstancesAPI
 class LoadBalancer:
     def __init__(self, cache_dir, index_file, token, repo, polling_interval=4, max_retries=3, initial_delay=1):
+        self.version = "0.0.2.7 V Beta"
         self.instances = []
         self.instances_health = {}
         self.polling_interval = polling_interval
         self.max_retries = max_retries
         self.initial_delay = initial_delay
+        self.stop_event = Event()
         self.instances_api = InstancesAPI(self.instances)
         self.CACHE_DIR = cache_dir
         self.INDEX_FILE = index_file
         indexer()
         # Load the file structure JSON
+        self.load_file_structure()
+        # Start polling and file checking in separate threads
+        polling_thread = Thread(target=self.start_polling)
+        polling_thread.daemon = True
+        polling_thread.start()
+        file_checking_thread = Thread(target=self.check_file_updates)
+        file_checking_thread.daemon = True
+        file_checking_thread.start()
+    def load_file_structure(self):
         if not os.path.exists(self.INDEX_FILE):
             raise FileNotFoundError(f"{self.INDEX_FILE} not found. Please make sure the file exists.")
             self.file_structure = json.load(f)
         logging.info("File structure loaded successfully.")
+    def check_file_updates(self):
         while not self.stop_event.is_set():
             if self.index_file_last_modified != os.path.getmtime(self.INDEX_FILE):
                 logging.info(f"{self.INDEX_FILE} has been updated. Re-indexing...")
                 indexer()  # Re-run the indexer
+                self.load_file_structure()  # Reload the file structure
                 self.index_file_last_modified = os.path.getmtime(self.INDEX_FILE)
+                # Restart prefetching thread
+                if hasattr(self, 'prefetch_thread') and self.prefetch_thread.is_alive():
+                    self.prefetch_thread.join()
+                self.prefetch_thread = Thread(target=self.start_prefetching)
+                self.prefetch_thread.daemon = True
+                self.prefetch_thread.start()
+            time.sleep(120)  # Check every 2 minutes
     def register_instance(self, instance_url):
         if instance_url not in self.instances:
         else:
             logging.info(f"Instance {instance_url} not found for removal.")
+    def get_reports(self):
+        reports = self.instances_api.fetch_reports()
         # Initialize temporary JSON data holders
         temp_film_store = {}
         logging.info("Film and TV Stores processed successfully.")
         self.update_instances_health(instance=instance_url, cache_size=cache_size)
+    def start_polling(self):
         logging.info("Starting polling.")
         while not self.stop_event.is_set():
+            self.get_reports()
+            time.sleep(self.polling_interval)
         logging.info("Polling stopped.")
+    def stop_polling(self):
         logging.info("Stopping polling.")
         self.stop_event.set()
+    def start_prefetching(self):
+        """Start the metadata prefetching in a separate thread."""
+        self.prefetch_metadata()
+#################################################################
     def update_instances_health(self, instance, cache_size):
+        self.instances_health[instance] = {"used":cache_size["cache_size"],
                                            "total": "50 GB"}
         logging.info(f"Updated instance {instance} with cache size {cache_size}")
+    def download_film_to_best_instance(self, title):
         """
         Downloads a film to the first instance that has more free space on the self.instance_health list variable.
         The instance_health looks like this:
                 best_instance = instance_url
         if best_instance:
+            result = self.instances_api.download_film(best_instance, title)
             film_id = result["film_id"]
             status = result["status"]
             progress_url = f'{best_instance}/api/progress/{film_id}'
             response = {
+                "film_id":film_id,
+                "status":status,
+                "progress_url":progress_url
             }
             return response
             logging.error("No suitable instance found for downloading the film.")
             return {"error": "No suitable instance found for downloading the film."}
+    def download_episode_to_best_instance(self, title, season, episode):
         """
+        Downloads a episode to the first instance that has more free space on the self.instance_health list variable.
         The instance_health looks like this:
         {
             "https://unicone-studio-instance1.hf.space": {
             }
         }
         Args:
+            title (str): The title of the Tv show.
+            season (str): The season of the Tv show.
+            episode (str): The title of the Tv show.
         """
         best_instance = None
         max_free_space = -1
                 best_instance = instance_url
         if best_instance:
+            result = self.instances_api.download_episode(best_instance, title, season, episode)
             episode_id = result["episode_id"]
             status = result["status"]
             progress_url = f'{best_instance}/api/progress/{episode_id}'
             response = {
+                "episode_id":episode_id,
+                "status":status,
+                "progress_url":progress_url
             }
             return response
         else:
+            logging.error("No suitable instance found for downloading the film.")
+            return {"error": "No suitable instance found for downloading the film."}
+#################################################################
+    def find_movie_path(self, title):
         """Find the path of the movie in the JSON data based on the title."""
         for directory in self.file_structure:
             if directory['type'] == 'directory' and directory['path'] == 'films':
                                 return item['path']
         return None
+    def find_tv_path(self, title):
         """Find the path of the TV show in the JSON data based on the title."""
         for directory in self.file_structure:
             if directory['type'] == 'directory' and directory['path'] == 'tv':
                         return sub_directory['path']
         return None
+    def get_tv_structure(self, title):
         """Find the path of the TV show in the JSON data based on the title."""
         for directory in self.file_structure:
             if directory['type'] == 'directory' and directory['path'] == 'tv':
                         return sub_directory
         return None
+    def get_film_id(self, title):
         """Generate a film ID based on the title."""
         return title.replace(" ", "_").lower()
+    def prefetch_metadata(self):
         """Prefetch metadata for all items in the file structure."""
         for item in self.file_structure:
             if 'contents' in item:
                             title = parts[0].strip()
                             year = int(parts[-1])
+                    fetch_and_cache_json(original_title, title, media_type, year)
+    def get_all_tv_shows(self):
         """Get all TV shows from the indexed cache structure JSON file."""
         tv_shows = {}
         for directory in self.file_structure:
                                         })
         return tv_shows
+    def get_all_films(self):
         """Get all films from the indexed cache structure JSON file."""
         films = []
         for directory in self.file_structure:
                 for sub_directory in directory['contents']:
                     if sub_directory['type'] == 'directory':
                         films.append(sub_directory['path'])
+        return films

hf_scrapper.py CHANGED Viewed

@@ -1,19 +1,17 @@
 import os
 import json
-import aiohttp
-import asyncio
-import aiofiles
 import urllib.request
-from aiohttp import ClientSession, ClientTimeout
-from aiohttp.client_exceptions import ClientError
-from tqdm.asyncio import tqdm
 CACHE_DIR = os.getenv("CACHE_DIR")
 CACHE_JSON_PATH = os.path.join(CACHE_DIR, "cached_films.json")
 download_progress = {}
-async def get_system_proxies():
     """
     Retrieves the system's HTTP and HTTPS proxies.
@@ -31,7 +29,7 @@ async def get_system_proxies():
         print(f"Error getting system proxies: {e}")
         return {}
-async def get_file_structure(repo, token, path="", proxies=None):
     """
     Fetches the file structure of a specified Hugging Face repository.
@@ -46,18 +44,16 @@ async def get_file_structure(repo, token, path="", proxies=None):
     """
     api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
     headers = {'Authorization': f'Bearer {token}'}
-    timeout = ClientTimeout(total=10)
-    async with ClientSession(timeout=timeout) as session:
-        print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")
-        try:
-            async with session.get(api_url, headers=headers, proxy=proxies.get("http")) as response:
-                response.raise_for_status()
-                return await response.json()
-        except ClientError as e:
-            print(f"Error fetching file structure: {e}")
-            return []
-async def write_file_structure_to_json(file_structure, file_path):
     """
     Writes the file structure to a JSON file.
@@ -66,8 +62,12 @@ async def write_file_structure_to_json(file_structure, file_path):
         file_path (str): The path where the JSON file will be saved.
     """
     try:
-        async with aiofiles.open(file_path, 'w') as json_file:
-            await json_file.write(json.dumps(file_structure, indent=2))
         print(f'File structure written to {file_path}')
     except IOError as e:
         print(f"Error writing file structure to JSON: {e}")

 import os
+import requests
 import json
 import urllib.request
+import time
+from requests.exceptions import RequestException
+from tqdm import tqdm
 CACHE_DIR = os.getenv("CACHE_DIR")
 CACHE_JSON_PATH = os.path.join(CACHE_DIR, "cached_films.json")
 download_progress = {}
+def get_system_proxies():
     """
     Retrieves the system's HTTP and HTTPS proxies.
         print(f"Error getting system proxies: {e}")
         return {}
+def get_file_structure(repo, token, path="", proxies=None):
     """
     Fetches the file structure of a specified Hugging Face repository.
     """
     api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
     headers = {'Authorization': f'Bearer {token}'}
+    print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")
+    try:
+        response = requests.get(api_url, headers=headers, proxies=proxies)
+        response.raise_for_status()
+        return response.json()
+    except RequestException as e:
+        print(f"Error fetching file structure: {e}")
+        return []
+def write_file_structure_to_json(file_structure, file_path):
     """
     Writes the file structure to a JSON file.
         file_path (str): The path where the JSON file will be saved.
     """
     try:
+        with open(file_path, 'w') as json_file:
+            json.dump(file_structure, json_file, indent=2)
         print(f'File structure written to {file_path}')
     except IOError as e:
         print(f"Error writing file structure to JSON: {e}")
+if __name__ == "__main__":
+    file_url = "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
+    token = os.getenv("TOKEN")

indexer.py CHANGED Viewed

@@ -1,45 +1,31 @@
 import json
-import logging
-import asyncio
 from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json
 from dotenv import load_dotenv
 import os
 load_dotenv()
-async def index_repository(token, repo, current_path="", proxies=None):
-    try:
-        file_structure = await get_file_structure(repo, token, current_path, proxies)
-        full_structure = []
-        for item in file_structure:
-            if item['type'] == 'directory':
-                sub_directory_structure = await index_repository(token, repo, item['path'], proxies)
-                full_structure.append({
-                    "type": "directory",
-                    "path": item['path'],
-                    "contents": sub_directory_structure
-                })
-            else:
-                full_structure.append(item)
-        return full_structure
-    except Exception as e:
-        logging.error(f"Error indexing repository: {e}")
-        raise
-async def indexer():
     token = os.getenv("TOKEN")
     repo = os.getenv("REPO")
     output_path = os.getenv("INDEX_FILE")
-    if not token or not repo or not output_path:
-        logging.error("Environment variables TOKEN, REPO, or INDEX_FILE are not set.")
-        return
-    proxies = await get_system_proxies()
-    try:
-        full_structure = await index_repository(token, repo, "", proxies)
-        await write_file_structure_to_json(full_structure, output_path)
-        logging.info(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}")
-    except Exception as e:
-        logging.error(f"Error during indexing: {e}")

 import json
 from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json
 from dotenv import load_dotenv
 import os
 load_dotenv()
+def index_repository(token, repo, current_path="", proxies=None):
+    file_structure = get_file_structure(repo, token, current_path, proxies)
+    full_structure = []
+    for item in file_structure:
+        if item['type'] == 'directory':
+            sub_directory_structure = index_repository(token, repo, item['path'], proxies)
+            full_structure.append({
+                "type": "directory",
+                "path": item['path'],
+                "contents": sub_directory_structure
+            })
+        else:
+            full_structure.append(item)
+    return full_structure
+def indexer():
     token = os.getenv("TOKEN")
     repo = os.getenv("REPO")
     output_path = os.getenv("INDEX_FILE")
+    proxies = get_system_proxies()
+    full_structure = index_repository(token, repo, "", proxies)
+    write_file_structure_to_json(full_structure, output_path)
+    print(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}")