ChandimaPrabath commited on
Commit
2e81e75
1 Parent(s): 15d97c1
Files changed (9) hide show
  1. .gitignore +12 -0
  2. Instance.py +403 -0
  3. README.md +1 -1
  4. api.py +22 -0
  5. app.py +16 -2
  6. hf_scrapper.py +249 -0
  7. indexer.py +32 -0
  8. old.app.py +174 -0
  9. requirements.txt +4 -0
.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #.env
2
+ .env
3
+ # cache
4
+ tmp
5
+ # pycache
6
+ __pycache__
7
+ # stream-test.py
8
+ stream-test.py
9
+ #test
10
+ test.py
11
+ # README.md
12
+ README.md
Instance.py ADDED
@@ -0,0 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ import urllib.request
5
+ import time
6
+ from threading import Thread, Event
7
+ from requests.exceptions import RequestException
8
+ from tqdm import tqdm
9
+ from indexer import indexer
10
+ import logging
11
+
12
+ CACHE_DIR = os.getenv("CACHE_DIR")
13
+
14
+ download_progress = {}
15
+
16
+ class Instance:
17
+ def __init__(self, id, url, cache_dir, index_file, token, repo, load_balancer_api, max_retries=20, initial_delay=1):
18
+ self.version = "0.0.0.1 Alpha"
19
+ self.id = id
20
+ self.url = url
21
+ self.CACHE_DIR = cache_dir
22
+ self.INDEX_FILE = index_file
23
+ self.TOKEN = token
24
+ self.REPO = repo
25
+ self.FILM_STORE_JSON_PATH = os.path.join(cache_dir, "film_store.json")
26
+ self.TV_STORE_JSON_PATH = os.path.join(cache_dir, "tv_store.json")
27
+ self.download_threads = {}
28
+ self.file_structure = None
29
+ self.load_balancer_api = load_balancer_api
30
+ self.max_retries = max_retries
31
+ self.initial_delay = initial_delay
32
+ self.last_report_time = time.time() # Initialize the last report time
33
+ self.re_register_event = Event()
34
+
35
+ # Ensure CACHE_DIR exists
36
+ if not os.path.exists(self.CACHE_DIR):
37
+ os.makedirs(self.CACHE_DIR)
38
+
39
+ for path in [self.FILM_STORE_JSON_PATH, self.TV_STORE_JSON_PATH]:
40
+ if not os.path.exists(path):
41
+ with open(path, 'w') as json_file:
42
+ json.dump({}, json_file)
43
+
44
+ # Index the file structure and load it
45
+ self.run_indexer_and_load()
46
+
47
+ # Start prefetching metadata and monitoring registration
48
+ self.register_to_load_balancer()
49
+ registration_thread = Thread(target=self.monitor_registration)
50
+ registration_thread.daemon = True
51
+ registration_thread.start()
52
+
53
+ # Start the thread to re-index every 2 minutes
54
+ indexer_thread = Thread(target=self.run_indexer_periodically)
55
+ indexer_thread.daemon = True
56
+ indexer_thread.start()
57
+
58
+ def run_indexer_and_load(self):
59
+ """Runs the indexer and loads the file structure from INDEX_FILE."""
60
+ indexer()
61
+ if not os.path.exists(self.INDEX_FILE):
62
+ raise FileNotFoundError(f"{self.INDEX_FILE} not found. Please make sure the file exists.")
63
+
64
+ with open(self.INDEX_FILE, 'r') as f:
65
+ self.file_structure = json.load(f)
66
+ logging.info("File structure reloaded successfully.")
67
+
68
+ def run_indexer_periodically(self):
69
+ """Periodically reruns the indexer and reloads the file structure."""
70
+ while True:
71
+ time.sleep(120) # Wait for 2 minutes
72
+ logging.info("Re-running indexer and reloading file structure.")
73
+ self.run_indexer_and_load()
74
+
75
+ def compile_report(self):
76
+ self.last_report_time = time.time() # Update the last report time
77
+
78
+ film_store_path = os.path.join(self.CACHE_DIR, "film_store.json")
79
+ tv_store_path = os.path.join(self.CACHE_DIR, "tv_store.json")
80
+ cache_size = self.get_cache_size()
81
+
82
+ report = {
83
+ "instance_id": self.id,
84
+ "instance_url": self.url,
85
+ "film_store": self.read_json(film_store_path),
86
+ "tv_store": self.read_json(tv_store_path),
87
+ "cache_size": cache_size
88
+ }
89
+ return report
90
+
91
+ def register_to_load_balancer(self):
92
+ result = self.load_balancer_api.register_instance(self.id, self.url)
93
+ if result is not None:
94
+ logging.info(f'Registered instance {self.id} to load balancer.')
95
+ else:
96
+ logging.error(f'Failed to register instance {self.id} to load balancer.')
97
+
98
+ def monitor_registration(self):
99
+ while True:
100
+ if time.time() - self.last_report_time > 60: # Check if 1 minute has passed
101
+ logging.info('1 minute passed since last report. Re-registering...')
102
+ self.register_to_load_balancer()
103
+ self.last_report_time = time.time() # Reset the last report time
104
+ time.sleep(30) # Check every 30 seconds
105
+
106
+ def get_cache_size(self):
107
+ total_size = 0
108
+ for dirpath, dirnames, filenames in os.walk(CACHE_DIR):
109
+ for f in filenames:
110
+ fp = os.path.join(dirpath, f)
111
+ total_size += os.path.getsize(fp)
112
+ return {"cache_size": f"{total_size / (1024 * 1024 * 1024):.2f} GB"}
113
+
114
+ @staticmethod
115
+ def read_json(file_path):
116
+ if os.path.exists(file_path):
117
+ with open(file_path, 'r') as json_file:
118
+ return json.load(json_file)
119
+ return {}
120
+
121
+ @staticmethod
122
+ def get_system_proxies():
123
+ """
124
+ Retrieves the system's HTTP and HTTPS proxies.
125
+
126
+ Returns:
127
+ dict: A dictionary containing the proxies.
128
+ """
129
+ try:
130
+ proxies = urllib.request.getproxies()
131
+ print("System proxies:", proxies)
132
+ return {
133
+ "http": proxies.get("http"),
134
+ "https": proxies.get("http")
135
+ }
136
+ except Exception as e:
137
+ print(f"Error getting system proxies: {e}")
138
+ return {}
139
+
140
+ def download_film(self, file_url, token, cache_path, proxies, film_id, title, chunk_size=100 * 1024 * 1024):
141
+ """
142
+ Downloads a file from the specified URL and saves it to the cache path.
143
+ Tracks the download progress.
144
+
145
+ Args:
146
+ file_url (str): The URL of the file to download.
147
+ token (str): The authorization token for the request.
148
+ cache_path (str): The path to save the downloaded file.
149
+ proxies (dict): Proxies for the request.
150
+ film_id (str): Unique identifier for the film download.
151
+ title (str): The title of the film.
152
+ chunk_size (int): Size of each chunk to download.
153
+ """
154
+ print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
155
+ headers = {'Authorization': f'Bearer {token}'}
156
+ try:
157
+ response = requests.get(file_url, headers=headers, proxies=proxies, stream=True)
158
+ response.raise_for_status()
159
+
160
+ total_size = int(response.headers.get('content-length', 0))
161
+ download_progress[film_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}
162
+
163
+ os.makedirs(os.path.dirname(cache_path), exist_ok=True)
164
+ with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
165
+ for data in response.iter_content(chunk_size=chunk_size):
166
+ file.write(data)
167
+ pbar.update(len(data))
168
+ download_progress[film_id]["downloaded"] += len(data)
169
+
170
+ print(f'File cached to {cache_path} successfully.')
171
+ self.update_film_store_json(title, cache_path)
172
+ download_progress[film_id]["status"] = "Completed"
173
+ except RequestException as e:
174
+ print(f"Error downloading file: {e}")
175
+ download_progress[film_id]["status"] = "Failed"
176
+ except IOError as e:
177
+ print(f"Error writing file {cache_path}: {e}")
178
+ download_progress[film_id]["status"] = "Failed"
179
+ finally:
180
+ if download_progress[film_id]["status"] != "Downloading":
181
+ download_progress[film_id]["end_time"] = time.time()
182
+
183
+ @staticmethod
184
+ def get_download_progress(id):
185
+ """
186
+ Gets the download progress for a specific film.
187
+
188
+ Args:
189
+ film_id (str): The unique identifier for the film download.
190
+
191
+ Returns:
192
+ dict: A dictionary containing the total size, downloaded size, progress percentage, status, and ETA.
193
+ """
194
+ if id in download_progress:
195
+ total = download_progress[id]["total"]
196
+ downloaded = download_progress[id]["downloaded"]
197
+ status = download_progress[id].get("status", "In Progress")
198
+ progress = (downloaded / total) * 100 if total > 0 else 0
199
+
200
+ eta = None
201
+ if status == "Downloading" and downloaded > 0:
202
+ elapsed_time = time.time() - download_progress[id]["start_time"]
203
+ estimated_total_time = elapsed_time * (total / downloaded)
204
+ eta = estimated_total_time - elapsed_time
205
+ elif status == "Completed":
206
+ eta = 0
207
+
208
+ return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
209
+ return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}
210
+
211
+ def update_film_store_json(self,title, cache_path):
212
+ """
213
+ Updates the film store JSON with the new file.
214
+
215
+ Args:
216
+ title (str): The title of the film.
217
+ cache_path (str): The local path where the file is saved.
218
+ """
219
+ film_store_data = {}
220
+ if os.path.exists(self.FILM_STORE_JSON_PATH):
221
+ with open(self.FILM_STORE_JSON_PATH, 'r') as json_file:
222
+ film_store_data = json.load(json_file)
223
+
224
+ film_store_data[title] = cache_path
225
+
226
+ with open(self.FILM_STORE_JSON_PATH, 'w') as json_file:
227
+ json.dump(film_store_data, json_file, indent=2)
228
+ print(f'Film store updated with {title}.')
229
+
230
+ def download_episode(self, file_url, token, cache_path, proxies, episode_id, title, chunk_size=100 * 1024 * 1024):
231
+ """
232
+ Downloads a file from the specified URL and saves it to the cache path.
233
+ Tracks the download progress.
234
+
235
+ Args:
236
+ file_url (str): The URL of the file to download.
237
+ token (str): The authorization token for the request.
238
+ cache_path (str): The path to save the downloaded file.
239
+ proxies (dict): Proxies for the request.
240
+ episode_id (str): Unique identifier for the film download.
241
+ title (str): The title of the film.
242
+ chunk_size (int): Size of each chunk to download.
243
+ """
244
+ print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
245
+ headers = {'Authorization': f'Bearer {token}'}
246
+ try:
247
+ response = requests.get(file_url, headers=headers, proxies=proxies, stream=True)
248
+ response.raise_for_status()
249
+
250
+ total_size = int(response.headers.get('content-length', 0))
251
+ download_progress[episode_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}
252
+
253
+ os.makedirs(os.path.dirname(cache_path), exist_ok=True)
254
+ with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
255
+ for data in response.iter_content(chunk_size=chunk_size):
256
+ file.write(data)
257
+ pbar.update(len(data))
258
+ download_progress[episode_id]["downloaded"] += len(data)
259
+
260
+ print(f'File cached to {cache_path} successfully.')
261
+ self.update_tv_store_json(title, cache_path)
262
+ download_progress[episode_id]["status"] = "Completed"
263
+ except RequestException as e:
264
+ print(f"Error downloading file: {e}")
265
+ download_progress[episode_id]["status"] = "Failed"
266
+ except IOError as e:
267
+ print(f"Error writing file {cache_path}: {e}")
268
+ download_progress[episode_id]["status"] = "Failed"
269
+ finally:
270
+ if download_progress[episode_id]["status"] != "Downloading":
271
+ download_progress[episode_id]["end_time"] = time.time()
272
+
273
+ def update_tv_store_json(self, title, cache_path):
274
+ """
275
+ Updates the TV store JSON with the new file, organizing by title, season, and episode.
276
+
277
+ Args:
278
+ title (str): The title of the TV show.
279
+ cache_path (str): The local path where the file is saved.
280
+ """
281
+ tv_store_data = {}
282
+ if os.path.exists(self.TV_STORE_JSON_PATH):
283
+ with open(self.TV_STORE_JSON_PATH, 'r') as json_file:
284
+ tv_store_data = json.load(json_file)
285
+
286
+ # Extract season and episode information from the cache_path
287
+ season_part = os.path.basename(os.path.dirname(cache_path)) # Extracts 'Season 1'
288
+ episode_part = os.path.basename(cache_path) # Extracts 'Grand Blue Dreaming - S01E01 - Deep Blue HDTV-720p.mp4'
289
+
290
+ # Create the structure if not already present
291
+ if title not in tv_store_data:
292
+ tv_store_data[title] = {}
293
+
294
+ if season_part not in tv_store_data[title]:
295
+ tv_store_data[title][season_part] = {}
296
+
297
+ # Assuming episode_part is unique for each episode within a season
298
+ tv_store_data[title][season_part][episode_part] = cache_path
299
+
300
+ with open(self.TV_STORE_JSON_PATH, 'w') as json_file:
301
+ json.dump(tv_store_data, json_file, indent=2)
302
+
303
+ print(f'TV store updated with {title}, {season_part}, {episode_part}.')
304
+
305
+
306
+ def load_json(self, file_path):
307
+ """Load JSON data from a file."""
308
+ with open(file_path, 'r') as file:
309
+ return json.load(file)
310
+
311
+ def find_movie_path(self, title):
312
+ """Find the path of the movie in the JSON data based on the title."""
313
+ for directory in self.file_structure:
314
+ if directory['type'] == 'directory' and directory['path'] == 'films':
315
+ for sub_directory in directory['contents']:
316
+ if sub_directory['type'] == 'directory':
317
+ for item in sub_directory['contents']:
318
+ if item['type'] == 'file' and title.lower() in item['path'].lower():
319
+ return item['path']
320
+ return None
321
+
322
+ def find_tv_path(self, title):
323
+ """Find the path of the TV show in the JSON data based on the title."""
324
+ for directory in self.file_structure:
325
+ if directory['type'] == 'directory' and directory['path'] == 'tv':
326
+ for sub_directory in directory['contents']:
327
+ if sub_directory['type'] == 'directory' and title.lower() in sub_directory['path'].lower():
328
+ return sub_directory['path']
329
+ return None
330
+
331
+ def get_tv_structure(self, title):
332
+ """Find the path of the TV show in the JSON data based on the title."""
333
+ for directory in self.file_structure:
334
+ if directory['type'] == 'directory' and directory['path'] == 'tv':
335
+ for sub_directory in directory['contents']:
336
+ if sub_directory['type'] == 'directory' and title.lower() in sub_directory['path'].lower():
337
+ return sub_directory
338
+ return None
339
+
340
+ def get_film_id(self, title):
341
+ """Generate a film ID based on the title."""
342
+ return title.replace(" ", "_").lower()
343
+
344
+ def bytes_to_human_readable(self, num, suffix="B"):
345
+ for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
346
+ if abs(num) < 1024.0:
347
+ return f"{num:3.1f} {unit}{suffix}"
348
+ num /= 1024.0
349
+ return f"{num:.1f} Y{suffix}"
350
+
351
+ def encode_episodeid(self, title, season, episode):
352
+ return f"{title}_{season}_{episode}"
353
+
354
+ def get_all_tv_shows(self):
355
+ """Get all TV shows from the indexed cache structure JSON file."""
356
+ tv_shows = {}
357
+ for directory in self.file_structure:
358
+ if directory['type'] == 'directory' and directory['path'] == 'tv':
359
+ for sub_directory in directory['contents']:
360
+ if sub_directory['type'] == 'directory':
361
+ show_title = sub_directory['path'].split('/')[-1]
362
+ tv_shows[show_title] = []
363
+ for season_directory in sub_directory['contents']:
364
+ if season_directory['type'] == 'directory':
365
+ season = season_directory['path'].split('/')[-1]
366
+ for episode in season_directory['contents']:
367
+ if episode['type'] == 'file':
368
+ tv_shows[show_title].append({
369
+ "season": season,
370
+ "episode": episode['path'].split('/')[-1],
371
+ "path": episode['path']
372
+ })
373
+ return tv_shows
374
+
375
+ def get_all_films(self):
376
+ """Get all films from the indexed cache structure JSON file."""
377
+ films = []
378
+ for directory in self.file_structure:
379
+ if directory['type'] == 'directory' and directory['path'] == 'films':
380
+ for sub_directory in directory['contents']:
381
+ if sub_directory['type'] == 'directory':
382
+ films.append(sub_directory['path'])
383
+ return films
384
+
385
+ def register_to_load_balancer(self):
386
+ retries = 0
387
+ delay = self.initial_delay
388
+ max_delay = 120
389
+
390
+ while True:
391
+ try:
392
+ result = self.load_balancer_api.register_instance(self.id, self.url)
393
+ if result:
394
+ logging.info(f'Successfully registered instance {self.id} to load balancer.')
395
+ return result
396
+
397
+ except Exception as e:
398
+ logging.error(f'Error during registration: {e}')
399
+
400
+ retries += 1
401
+ logging.warning(f'Attempt {retries} to register instance {self.id} failed. Retrying in {delay} seconds...')
402
+ time.sleep(delay)
403
+ delay = min(delay * 2, max_delay) # Exponential backoff with maximum delay
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Load Balancer
3
  emoji: 👀
4
  colorFrom: pink
5
  colorTo: red
 
1
  ---
2
+ title: Instance1
3
  emoji: 👀
4
  colorFrom: pink
5
  colorTo: red
api.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import logging
3
+ import json
4
+
5
+ class LoadBalancerAPI:
6
+ def __init__(self, base_url):
7
+ self.base_url = base_url
8
+
9
+ def register_instance(self, instance_id, instance_url):
10
+ data = {
11
+ "url": instance_url
12
+ }
13
+ api_endpoint = f'{self.base_url}/api/post/register'
14
+
15
+ try:
16
+ headers = {'Content-Type': 'application/json'}
17
+ response = requests.post(api_endpoint, data=json.dumps(data), headers=headers)
18
+ response.raise_for_status()
19
+ return response.json() # Assuming the API returns JSON
20
+ except requests.exceptions.RequestException as e:
21
+ logging.error(f'Failed to register instance {instance_id} to load balancer: {e}')
22
+ return None
app.py CHANGED
@@ -1,7 +1,21 @@
1
  from fastapi import FastAPI
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  app = FastAPI()
4
 
5
  @app.get("/")
6
- def greet_json():
7
- return {"Hello": "World!"}
 
1
  from fastapi import FastAPI
2
+ from Instance import Instance
3
+ from api import LoadBalancerAPI
4
+ import os
5
+ # Constants and Configuration
6
+ CACHE_DIR = os.getenv("CACHE_DIR")
7
+ INDEX_FILE = os.getenv("INDEX_FILE")
8
+ TOKEN = os.getenv("TOKEN")
9
+ REPO = os.getenv("REPO")
10
+ ID = os.getenv("ID")
11
+ URL = os.getenv("URL")
12
+ LOAD_BALANCER_URL = os.getenv("LOAD_BALANCER_URL")
13
+
14
+ load_balancer_api = LoadBalancerAPI(base_url=LOAD_BALANCER_URL)
15
+ instance = Instance(id=ID, url=URL, cache_dir=CACHE_DIR, index_file=INDEX_FILE, token=TOKEN, repo=REPO, load_balancer_api=load_balancer_api)
16
 
17
  app = FastAPI()
18
 
19
  @app.get("/")
20
+ async def index():
21
+ return instance.version
hf_scrapper.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ import urllib.request
5
+ import time
6
+ from requests.exceptions import RequestException
7
+ from tqdm import tqdm
8
+
9
+ CACHE_DIR = os.getenv("CACHE_DIR")
10
+ CACHE_JSON_PATH = os.path.join(CACHE_DIR, "cached_films.json")
11
+
12
+ download_progress = {}
13
+
14
+ def get_system_proxies():
15
+ """
16
+ Retrieves the system's HTTP and HTTPS proxies.
17
+
18
+ Returns:
19
+ dict: A dictionary containing the proxies.
20
+ """
21
+ try:
22
+ proxies = urllib.request.getproxies()
23
+ print("System proxies:", proxies)
24
+ return {
25
+ "http": proxies.get("http"),
26
+ "https": proxies.get("http")
27
+ }
28
+ except Exception as e:
29
+ print(f"Error getting system proxies: {e}")
30
+ return {}
31
+
32
+ def download_film(file_url, token, cache_path, proxies, film_id, title, chunk_size=100 * 1024 * 1024):
33
+ """
34
+ Downloads a file from the specified URL and saves it to the cache path.
35
+ Tracks the download progress.
36
+
37
+ Args:
38
+ file_url (str): The URL of the file to download.
39
+ token (str): The authorization token for the request.
40
+ cache_path (str): The path to save the downloaded file.
41
+ proxies (dict): Proxies for the request.
42
+ film_id (str): Unique identifier for the film download.
43
+ title (str): The title of the film.
44
+ chunk_size (int): Size of each chunk to download.
45
+ """
46
+ print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
47
+ headers = {'Authorization': f'Bearer {token}'}
48
+ try:
49
+ response = requests.get(file_url, headers=headers, proxies=proxies, stream=True)
50
+ response.raise_for_status()
51
+
52
+ total_size = int(response.headers.get('content-length', 0))
53
+ download_progress[film_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}
54
+
55
+ os.makedirs(os.path.dirname(cache_path), exist_ok=True)
56
+ with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
57
+ for data in response.iter_content(chunk_size=chunk_size):
58
+ file.write(data)
59
+ pbar.update(len(data))
60
+ download_progress[film_id]["downloaded"] += len(data)
61
+
62
+ print(f'File cached to {cache_path} successfully.')
63
+ update_film_store_json(title, cache_path)
64
+ download_progress[film_id]["status"] = "Completed"
65
+ except RequestException as e:
66
+ print(f"Error downloading file: {e}")
67
+ download_progress[film_id]["status"] = "Failed"
68
+ except IOError as e:
69
+ print(f"Error writing file {cache_path}: {e}")
70
+ download_progress[film_id]["status"] = "Failed"
71
+ finally:
72
+ if download_progress[film_id]["status"] != "Downloading":
73
+ download_progress[film_id]["end_time"] = time.time()
74
+
75
+ def get_download_progress(id):
76
+ """
77
+ Gets the download progress for a specific film.
78
+
79
+ Args:
80
+ film_id (str): The unique identifier for the film download.
81
+
82
+ Returns:
83
+ dict: A dictionary containing the total size, downloaded size, progress percentage, status, and ETA.
84
+ """
85
+ if id in download_progress:
86
+ total = download_progress[id]["total"]
87
+ downloaded = download_progress[id]["downloaded"]
88
+ status = download_progress[id].get("status", "In Progress")
89
+ progress = (downloaded / total) * 100 if total > 0 else 0
90
+
91
+ eta = None
92
+ if status == "Downloading" and downloaded > 0:
93
+ elapsed_time = time.time() - download_progress[id]["start_time"]
94
+ estimated_total_time = elapsed_time * (total / downloaded)
95
+ eta = estimated_total_time - elapsed_time
96
+ elif status == "Completed":
97
+ eta = 0
98
+
99
+ return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
100
+ return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}
101
+
102
+ def update_film_store_json(title, cache_path):
103
+ """
104
+ Updates the film store JSON with the new file.
105
+
106
+ Args:
107
+ title (str): The title of the film.
108
+ cache_path (str): The local path where the file is saved.
109
+ """
110
+ FILM_STORE_JSON_PATH = os.path.join(CACHE_DIR, "film_store.json")
111
+
112
+ film_store_data = {}
113
+ if os.path.exists(FILM_STORE_JSON_PATH):
114
+ with open(FILM_STORE_JSON_PATH, 'r') as json_file:
115
+ film_store_data = json.load(json_file)
116
+
117
+ film_store_data[title] = cache_path
118
+
119
+ with open(FILM_STORE_JSON_PATH, 'w') as json_file:
120
+ json.dump(film_store_data, json_file, indent=2)
121
+ print(f'Film store updated with {title}.')
122
+
123
+
124
+ ###############################################################################
125
+ def download_episode(file_url, token, cache_path, proxies, episode_id, title, chunk_size=100 * 1024 * 1024):
126
+ """
127
+ Downloads a file from the specified URL and saves it to the cache path.
128
+ Tracks the download progress.
129
+
130
+ Args:
131
+ file_url (str): The URL of the file to download.
132
+ token (str): The authorization token for the request.
133
+ cache_path (str): The path to save the downloaded file.
134
+ proxies (dict): Proxies for the request.
135
+ film_id (str): Unique identifier for the film download.
136
+ title (str): The title of the film.
137
+ chunk_size (int): Size of each chunk to download.
138
+ """
139
+ print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
140
+ headers = {'Authorization': f'Bearer {token}'}
141
+ try:
142
+ response = requests.get(file_url, headers=headers, proxies=proxies, stream=True)
143
+ response.raise_for_status()
144
+
145
+ total_size = int(response.headers.get('content-length', 0))
146
+ download_progress[episode_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}
147
+
148
+ os.makedirs(os.path.dirname(cache_path), exist_ok=True)
149
+ with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
150
+ for data in response.iter_content(chunk_size=chunk_size):
151
+ file.write(data)
152
+ pbar.update(len(data))
153
+ download_progress[episode_id]["downloaded"] += len(data)
154
+
155
+ print(f'File cached to {cache_path} successfully.')
156
+ update_tv_store_json(title, cache_path)
157
+ download_progress[episode_id]["status"] = "Completed"
158
+ except RequestException as e:
159
+ print(f"Error downloading file: {e}")
160
+ download_progress[episode_id]["status"] = "Failed"
161
+ except IOError as e:
162
+ print(f"Error writing file {cache_path}: {e}")
163
+ download_progress[episode_id]["status"] = "Failed"
164
+ finally:
165
+ if download_progress[episode_id]["status"] != "Downloading":
166
+ download_progress[episode_id]["end_time"] = time.time()
167
+
168
+
169
+ def update_tv_store_json(title, cache_path):
170
+ """
171
+ Updates the TV store JSON with the new file, organizing by title, season, and episode.
172
+
173
+ Args:
174
+ title (str): The title of the TV show.
175
+ cache_path (str): The local path where the file is saved.
176
+ """
177
+ TV_STORE_JSON_PATH = os.path.join(CACHE_DIR, "tv_store.json")
178
+
179
+ tv_store_data = {}
180
+ if os.path.exists(TV_STORE_JSON_PATH):
181
+ with open(TV_STORE_JSON_PATH, 'r') as json_file:
182
+ tv_store_data = json.load(json_file)
183
+
184
+ # Extract season and episode information from the cache_path
185
+ season_part = os.path.basename(os.path.dirname(cache_path)) # Extracts 'Season 1'
186
+ episode_part = os.path.basename(cache_path) # Extracts 'Grand Blue Dreaming - S01E01 - Deep Blue HDTV-720p.mp4'
187
+
188
+ # Create the structure if not already present
189
+ if title not in tv_store_data:
190
+ tv_store_data[title] = {}
191
+
192
+ if season_part not in tv_store_data[title]:
193
+ tv_store_data[title][season_part] = {}
194
+
195
+ # Assuming episode_part is unique for each episode within a season
196
+ tv_store_data[title][season_part][episode_part] = cache_path
197
+
198
+ with open(TV_STORE_JSON_PATH, 'w') as json_file:
199
+ json.dump(tv_store_data, json_file, indent=2)
200
+
201
+ print(f'TV store updated with {title}, {season_part}, {episode_part}.')
202
+
203
+ ###############################################################################
204
+ def get_file_structure(repo, token, path="", proxies=None):
205
+ """
206
+ Fetches the file structure of a specified Hugging Face repository.
207
+
208
+ Args:
209
+ repo (str): The name of the repository.
210
+ token (str): The authorization token for the request.
211
+ path (str, optional): The specific path in the repository. Defaults to "".
212
+ proxies (dict, optional): The proxies to use for the request. Defaults to None.
213
+
214
+ Returns:
215
+ list: A list of file structure information.
216
+ """
217
+ api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
218
+ headers = {'Authorization': f'Bearer {token}'}
219
+ print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")
220
+ try:
221
+ response = requests.get(api_url, headers=headers, proxies=proxies)
222
+ response.raise_for_status()
223
+ return response.json()
224
+ except RequestException as e:
225
+ print(f"Error fetching file structure: {e}")
226
+ return []
227
+
228
+ def write_file_structure_to_json(file_structure, file_path):
229
+ """
230
+ Writes the file structure to a JSON file.
231
+
232
+ Args:
233
+ file_structure (list): The file structure data.
234
+ file_path (str): The path where the JSON file will be saved.
235
+ """
236
+ try:
237
+ with open(file_path, 'w') as json_file:
238
+ json.dump(file_structure, json_file, indent=2)
239
+ print(f'File structure written to {file_path}')
240
+ except IOError as e:
241
+ print(f"Error writing file structure to JSON: {e}")
242
+
243
+ if __name__ == "__main__":
244
+ file_url = "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
245
+ token = os.getenv("TOKEN")
246
+ cache_path = os.path.join(CACHE_DIR, "films/Funky Monkey 2004/Funky Monkey (2004) Web-dl 1080p.mp4")
247
+ proxies = get_system_proxies()
248
+ film_id = "funky_monkey_2004" # Unique identifier for the film download
249
+ download_film(file_url, token, cache_path, proxies=proxies, film_id=film_id)
indexer.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json
3
+ from dotenv import load_dotenv
4
+ import os
5
+
6
+ load_dotenv()
7
+
8
+ def index_repository(token, repo, current_path="", proxies=None):
9
+ file_structure = get_file_structure(repo, token, current_path, proxies)
10
+ full_structure = []
11
+ for item in file_structure:
12
+ if item['type'] == 'directory':
13
+ sub_directory_structure = index_repository(token, repo, item['path'], proxies)
14
+ full_structure.append({
15
+ "type": "directory",
16
+ "path": item['path'],
17
+ "contents": sub_directory_structure
18
+ })
19
+ else:
20
+ full_structure.append(item)
21
+ return full_structure
22
+
23
+ def indexer():
24
+ token = os.getenv("TOKEN")
25
+ repo = os.getenv("REPO")
26
+ output_path = os.getenv("INDEX_FILE")
27
+
28
+ proxies = get_system_proxies()
29
+ full_structure = index_repository(token, repo, "", proxies)
30
+ write_file_structure_to_json(full_structure, output_path)
31
+ print(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}")
32
+
old.app.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, jsonify, request, send_from_directory
2
+ from flask_cors import CORS
3
+ import os
4
+ import json
5
+ from threading import Thread
6
+ import urllib.parse
7
+ from Instance import Instance
8
+ from api import LoadBalancerAPI
9
+
10
+ app = Flask(__name__)
11
+ CORS(app)
12
+
13
+ # Constants and Configuration
14
+ CACHE_DIR = os.getenv("CACHE_DIR")
15
+ INDEX_FILE = os.getenv("INDEX_FILE")
16
+ TOKEN = os.getenv("TOKEN")
17
+ REPO = os.getenv("REPO")
18
+ ID = os.getenv("ID")
19
+ URL = os.getenv("URL")
20
+ LOAD_BALANCER_URL = os.getenv("LOAD_BALANCER_URL")
21
+
22
+ load_balancer_api = LoadBalancerAPI(base_url=LOAD_BALANCER_URL)
23
+ instance = Instance(id=ID, url=URL, cache_dir=CACHE_DIR, index_file=INDEX_FILE, token=TOKEN, repo=REPO, load_balancer_api=load_balancer_api)
24
+
25
+ # API Endpoints
26
+ @app.route('/api/film/<title>', methods=['GET'])
27
+ def get_movie_api(title):
28
+ """Endpoint to get the movie by title."""
29
+ if not title:
30
+ return jsonify({"error": "Title parameter is required"}), 400
31
+
32
+ # Load the film store JSON
33
+ with open(instance.FILM_STORE_JSON_PATH, 'r') as json_file:
34
+ film_store_data = json.load(json_file)
35
+
36
+ # Check if the film is already cached
37
+ if title in film_store_data:
38
+ cache_path = film_store_data[title]
39
+ if os.path.exists(cache_path):
40
+ return send_from_directory(os.path.dirname(cache_path), os.path.basename(cache_path))
41
+
42
+ movie_path = instance.find_movie_path(title)
43
+
44
+ if not movie_path:
45
+ return jsonify({"error": "Movie not found"}), 404
46
+
47
+ cache_path = os.path.join(CACHE_DIR, movie_path)
48
+ file_url = f"https://huggingface.co/{REPO}/resolve/main/{movie_path}"
49
+ proxies = instance.get_system_proxies()
50
+ film_id = instance.get_film_id(title)
51
+
52
+ # Start the download in a separate thread if not already downloading
53
+ if film_id not in instance.download_threads or not instance.download_threads[film_id].is_alive():
54
+ thread = Thread(target=instance.download_film, args=(file_url, TOKEN, cache_path, proxies, film_id, title))
55
+ instance.download_threads[film_id] = thread
56
+ thread.start()
57
+
58
+ return jsonify({"status": "Download started", "film_id": film_id})
59
+
60
+ @app.route('/api/tv/<title>/<season>/<episode>', methods=['GET'])
61
+ def get_tv_show_api(title, season, episode):
62
+ """Endpoint to get the TV show by title, season, and episode."""
63
+ if not title or not season or not episode:
64
+ return jsonify({"error": "Title, season, and episode parameters are required"}), 400
65
+
66
+ # Load the TV store JSON
67
+ with open(instance.TV_STORE_JSON_PATH, 'r') as json_file:
68
+ tv_store_data = json.load(json_file)
69
+
70
+ # Check if the episode is already cached
71
+ if title in tv_store_data and season in tv_store_data[title]:
72
+ for ep in tv_store_data[title][season]:
73
+ if episode in ep:
74
+ cache_path = tv_store_data[title][season][ep]
75
+ print(cache_path)
76
+ if os.path.exists(cache_path):
77
+ return send_from_directory(os.path.dirname(cache_path), os.path.basename(cache_path))
78
+
79
+ tv_path = instance.find_tv_path(title)
80
+
81
+ if not tv_path:
82
+ return jsonify({"error": "TV show not found"}), 404
83
+
84
+ episode_path = None
85
+ for directory in instance.file_structure:
86
+ if directory['type'] == 'directory' and directory['path'] == 'tv':
87
+ for sub_directory in directory['contents']:
88
+ if sub_directory['type'] == 'directory' and title.lower() in sub_directory['path'].lower():
89
+ for season_dir in sub_directory['contents']:
90
+ if season_dir['type'] == 'directory' and season in season_dir['path']:
91
+ for episode_file in season_dir['contents']:
92
+ if episode_file['type'] == 'file' and episode in episode_file['path']:
93
+ episode_path = episode_file['path']
94
+ break
95
+
96
+ if not episode_path:
97
+ return jsonify({"error": "Episode not found"}), 404
98
+
99
+ cache_path = os.path.join(CACHE_DIR, episode_path)
100
+ file_url = f"https://huggingface.co/{REPO}/resolve/main/{episode_path}"
101
+ proxies = instance.get_system_proxies()
102
+ episode_id = instance.encode_episodeid(title, season, episode)
103
+
104
+ # Start the download in a separate thread if not already downloading
105
+ if episode_id not in instance.download_threads or not instance.download_threads[episode_id].is_alive():
106
+ thread = Thread(target=instance.download_episode, args=(file_url, TOKEN, cache_path, proxies, episode_id, title))
107
+ instance.download_threads[episode_id] = thread
108
+ thread.start()
109
+
110
+ return jsonify({"status": "Download started", "episode_id": episode_id})
111
+
112
+ @app.route('/api/progress/<id>', methods=['GET'])
113
+ def get_progress_api(id):
114
+ """Endpoint to get the download progress of a movie or TV show episode."""
115
+ progress = instance.get_download_progress(id)
116
+ return jsonify({"id": id, "progress": progress})
117
+
118
+ @app.route('/api/cache/size', methods=['GET'])
119
+ def get_cache_size_api():
120
+ total_size = 0
121
+ for dirpath, dirnames, filenames in os.walk(CACHE_DIR):
122
+ for f in filenames:
123
+ fp = os.path.join(dirpath, f)
124
+ total_size += os.path.getsize(fp)
125
+ readable_size = instance.bytes_to_human_readable(total_size)
126
+ return jsonify({"cache_size": readable_size})
127
+
128
+ @app.route('/api/cache/clear', methods=['POST'])
129
+ def clear_cache_api():
130
+ for dirpath, dirnames, filenames in os.walk(CACHE_DIR):
131
+ for f in filenames:
132
+ fp = os.path.join(dirpath, f)
133
+ os.remove(fp)
134
+ return jsonify({"status": "Cache cleared"})
135
+
136
+ @app.route('/api/tv/store', methods=['GET'])
137
+ def get_tv_store_api():
138
+ """Endpoint to get the TV store JSON."""
139
+ if os.path.exists(instance.TV_STORE_JSON_PATH):
140
+ with open(instance.TV_STORE_JSON_PATH, 'r') as json_file:
141
+ tv_store_data = json.load(json_file)
142
+ return jsonify(tv_store_data)
143
+ return jsonify({}), 404
144
+
145
+ @app.route('/api/film/store', methods=['GET'])
146
+ def get_film_store_api():
147
+ """Endpoint to get the film store JSON."""
148
+ if os.path.exists(instance.FILM_STORE_JSON_PATH):
149
+ with open(instance.FILM_STORE_JSON_PATH, 'r') as json_file:
150
+ tv_store_data = json.load(json_file)
151
+ return jsonify(tv_store_data)
152
+ return jsonify({}), 404
153
+
154
+ @app.route("/api/film/all")
155
+ def get_all_films_api():
156
+ return instance.get_all_films()
157
+
158
+ @app.route("/api/tv/all")
159
+ def get_all_tvshows_api():
160
+ return instance.get_all_tv_shows()
161
+
162
+ @app.route("/api/get/report",methods=["GET"])
163
+ def get_report():
164
+ report=instance.compile_report()
165
+ return jsonify(report)
166
+
167
+ # Routes
168
+ @app.route('/')
169
+ def index():
170
+ return jsonify(instance.version)
171
+
172
+ # Main entry point
173
+ if __name__ == "__main__":
174
+ app.run(debug=True, host="0.0.0.0", port=7860)
requirements.txt CHANGED
@@ -1,2 +1,6 @@
1
  fastapi
2
  uvicorn[standard]
 
 
 
 
 
1
  fastapi
2
  uvicorn[standard]
3
+ requests
4
+ python-dotenv
5
+ tqdm
6
+ aiofiles