ChandimaPrabath commited on
Commit
dd2f10e
1 Parent(s): 2e81e75
Files changed (5) hide show
  1. Instance.py +27 -123
  2. api.py +11 -0
  3. app.py +17 -1
  4. hf_scrapper.py +18 -102
  5. indexer.py +0 -32
Instance.py CHANGED
@@ -1,12 +1,10 @@
1
  import os
2
  import requests
3
  import json
4
- import urllib.request
5
  import time
6
  from threading import Thread, Event
7
  from requests.exceptions import RequestException
8
  from tqdm import tqdm
9
- from indexer import indexer
10
  import logging
11
 
12
  CACHE_DIR = os.getenv("CACHE_DIR")
@@ -14,76 +12,58 @@ CACHE_DIR = os.getenv("CACHE_DIR")
14
  download_progress = {}
15
 
16
  class Instance:
17
- def __init__(self, id, url, cache_dir, index_file, token, repo, load_balancer_api, max_retries=20, initial_delay=1):
18
  self.version = "0.0.0.1 Alpha"
19
  self.id = id
20
  self.url = url
21
  self.CACHE_DIR = cache_dir
22
- self.INDEX_FILE = index_file
23
  self.TOKEN = token
24
  self.REPO = repo
25
- self.FILM_STORE_JSON_PATH = os.path.join(cache_dir, "film_store.json")
26
- self.TV_STORE_JSON_PATH = os.path.join(cache_dir, "tv_store.json")
27
  self.download_threads = {}
28
  self.file_structure = None
29
  self.load_balancer_api = load_balancer_api
30
  self.max_retries = max_retries
31
  self.initial_delay = initial_delay
32
- self.last_report_time = time.time() # Initialize the last report time
33
  self.re_register_event = Event()
34
 
35
  # Ensure CACHE_DIR exists
36
  if not os.path.exists(self.CACHE_DIR):
37
  os.makedirs(self.CACHE_DIR)
38
 
39
- for path in [self.FILM_STORE_JSON_PATH, self.TV_STORE_JSON_PATH]:
40
- if not os.path.exists(path):
41
- with open(path, 'w') as json_file:
42
- json.dump({}, json_file)
43
-
44
- # Index the file structure and load it
45
- self.run_indexer_and_load()
46
-
47
- # Start prefetching metadata and monitoring registration
48
  self.register_to_load_balancer()
 
49
  registration_thread = Thread(target=self.monitor_registration)
50
  registration_thread.daemon = True
51
  registration_thread.start()
52
 
53
  # Start the thread to re-index every 2 minutes
54
- indexer_thread = Thread(target=self.run_indexer_periodically)
55
  indexer_thread.daemon = True
56
  indexer_thread.start()
57
 
58
- def run_indexer_and_load(self):
59
  """Runs the indexer and loads the file structure from INDEX_FILE."""
60
- indexer()
61
- if not os.path.exists(self.INDEX_FILE):
62
- raise FileNotFoundError(f"{self.INDEX_FILE} not found. Please make sure the file exists.")
63
-
64
- with open(self.INDEX_FILE, 'r') as f:
65
- self.file_structure = json.load(f)
66
  logging.info("File structure reloaded successfully.")
67
 
68
- def run_indexer_periodically(self):
69
  """Periodically reruns the indexer and reloads the file structure."""
70
  while True:
71
- time.sleep(120) # Wait for 2 minutes
72
  logging.info("Re-running indexer and reloading file structure.")
73
- self.run_indexer_and_load()
74
 
75
  def compile_report(self):
76
  self.last_report_time = time.time() # Update the last report time
77
-
78
- film_store_path = os.path.join(self.CACHE_DIR, "film_store.json")
79
- tv_store_path = os.path.join(self.CACHE_DIR, "tv_store.json")
80
  cache_size = self.get_cache_size()
81
-
82
  report = {
83
  "instance_id": self.id,
84
  "instance_url": self.url,
85
- "film_store": self.read_json(film_store_path),
86
- "tv_store": self.read_json(tv_store_path),
87
  "cache_size": cache_size
88
  }
89
  return report
@@ -118,26 +98,7 @@ class Instance:
118
  return json.load(json_file)
119
  return {}
120
 
121
- @staticmethod
122
- def get_system_proxies():
123
- """
124
- Retrieves the system's HTTP and HTTPS proxies.
125
-
126
- Returns:
127
- dict: A dictionary containing the proxies.
128
- """
129
- try:
130
- proxies = urllib.request.getproxies()
131
- print("System proxies:", proxies)
132
- return {
133
- "http": proxies.get("http"),
134
- "https": proxies.get("http")
135
- }
136
- except Exception as e:
137
- print(f"Error getting system proxies: {e}")
138
- return {}
139
-
140
- def download_film(self, file_url, token, cache_path, proxies, film_id, title, chunk_size=100 * 1024 * 1024):
141
  """
142
  Downloads a file from the specified URL and saves it to the cache path.
143
  Tracks the download progress.
@@ -146,15 +107,14 @@ class Instance:
146
  file_url (str): The URL of the file to download.
147
  token (str): The authorization token for the request.
148
  cache_path (str): The path to save the downloaded file.
149
- proxies (dict): Proxies for the request.
150
  film_id (str): Unique identifier for the film download.
151
  title (str): The title of the film.
152
  chunk_size (int): Size of each chunk to download.
153
  """
154
- print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
155
  headers = {'Authorization': f'Bearer {token}'}
156
  try:
157
- response = requests.get(file_url, headers=headers, proxies=proxies, stream=True)
158
  response.raise_for_status()
159
 
160
  total_size = int(response.headers.get('content-length', 0))
@@ -168,7 +128,7 @@ class Instance:
168
  download_progress[film_id]["downloaded"] += len(data)
169
 
170
  print(f'File cached to {cache_path} successfully.')
171
- self.update_film_store_json(title, cache_path)
172
  download_progress[film_id]["status"] = "Completed"
173
  except RequestException as e:
174
  print(f"Error downloading file: {e}")
@@ -208,26 +168,9 @@ class Instance:
208
  return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
209
  return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}
210
 
211
- def update_film_store_json(self,title, cache_path):
212
- """
213
- Updates the film store JSON with the new file.
214
-
215
- Args:
216
- title (str): The title of the film.
217
- cache_path (str): The local path where the file is saved.
218
- """
219
- film_store_data = {}
220
- if os.path.exists(self.FILM_STORE_JSON_PATH):
221
- with open(self.FILM_STORE_JSON_PATH, 'r') as json_file:
222
- film_store_data = json.load(json_file)
223
-
224
- film_store_data[title] = cache_path
225
 
226
- with open(self.FILM_STORE_JSON_PATH, 'w') as json_file:
227
- json.dump(film_store_data, json_file, indent=2)
228
- print(f'Film store updated with {title}.')
229
 
230
- def download_episode(self, file_url, token, cache_path, proxies, episode_id, title, chunk_size=100 * 1024 * 1024):
231
  """
232
  Downloads a file from the specified URL and saves it to the cache path.
233
  Tracks the download progress.
@@ -236,15 +179,14 @@ class Instance:
236
  file_url (str): The URL of the file to download.
237
  token (str): The authorization token for the request.
238
  cache_path (str): The path to save the downloaded file.
239
- proxies (dict): Proxies for the request.
240
  episode_id (str): Unique identifier for the film download.
241
  title (str): The title of the film.
242
  chunk_size (int): Size of each chunk to download.
243
  """
244
- print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
245
  headers = {'Authorization': f'Bearer {token}'}
246
  try:
247
- response = requests.get(file_url, headers=headers, proxies=proxies, stream=True)
248
  response.raise_for_status()
249
 
250
  total_size = int(response.headers.get('content-length', 0))
@@ -258,7 +200,7 @@ class Instance:
258
  download_progress[episode_id]["downloaded"] += len(data)
259
 
260
  print(f'File cached to {cache_path} successfully.')
261
- self.update_tv_store_json(title, cache_path)
262
  download_progress[episode_id]["status"] = "Completed"
263
  except RequestException as e:
264
  print(f"Error downloading file: {e}")
@@ -270,7 +212,7 @@ class Instance:
270
  if download_progress[episode_id]["status"] != "Downloading":
271
  download_progress[episode_id]["end_time"] = time.time()
272
 
273
- def update_tv_store_json(self, title, cache_path):
274
  """
275
  Updates the TV store JSON with the new file, organizing by title, season, and episode.
276
 
@@ -278,27 +220,20 @@ class Instance:
278
  title (str): The title of the TV show.
279
  cache_path (str): The local path where the file is saved.
280
  """
281
- tv_store_data = {}
282
- if os.path.exists(self.TV_STORE_JSON_PATH):
283
- with open(self.TV_STORE_JSON_PATH, 'r') as json_file:
284
- tv_store_data = json.load(json_file)
285
 
286
  # Extract season and episode information from the cache_path
287
  season_part = os.path.basename(os.path.dirname(cache_path)) # Extracts 'Season 1'
288
  episode_part = os.path.basename(cache_path) # Extracts 'Grand Blue Dreaming - S01E01 - Deep Blue HDTV-720p.mp4'
289
 
290
  # Create the structure if not already present
291
- if title not in tv_store_data:
292
- tv_store_data[title] = {}
293
 
294
- if season_part not in tv_store_data[title]:
295
- tv_store_data[title][season_part] = {}
296
 
297
  # Assuming episode_part is unique for each episode within a season
298
- tv_store_data[title][season_part][episode_part] = cache_path
299
-
300
- with open(self.TV_STORE_JSON_PATH, 'w') as json_file:
301
- json.dump(tv_store_data, json_file, indent=2)
302
 
303
  print(f'TV store updated with {title}, {season_part}, {episode_part}.')
304
 
@@ -351,37 +286,6 @@ class Instance:
351
  def encode_episodeid(self, title, season, episode):
352
  return f"{title}_{season}_{episode}"
353
 
354
- def get_all_tv_shows(self):
355
- """Get all TV shows from the indexed cache structure JSON file."""
356
- tv_shows = {}
357
- for directory in self.file_structure:
358
- if directory['type'] == 'directory' and directory['path'] == 'tv':
359
- for sub_directory in directory['contents']:
360
- if sub_directory['type'] == 'directory':
361
- show_title = sub_directory['path'].split('/')[-1]
362
- tv_shows[show_title] = []
363
- for season_directory in sub_directory['contents']:
364
- if season_directory['type'] == 'directory':
365
- season = season_directory['path'].split('/')[-1]
366
- for episode in season_directory['contents']:
367
- if episode['type'] == 'file':
368
- tv_shows[show_title].append({
369
- "season": season,
370
- "episode": episode['path'].split('/')[-1],
371
- "path": episode['path']
372
- })
373
- return tv_shows
374
-
375
- def get_all_films(self):
376
- """Get all films from the indexed cache structure JSON file."""
377
- films = []
378
- for directory in self.file_structure:
379
- if directory['type'] == 'directory' and directory['path'] == 'films':
380
- for sub_directory in directory['contents']:
381
- if sub_directory['type'] == 'directory':
382
- films.append(sub_directory['path'])
383
- return films
384
-
385
  def register_to_load_balancer(self):
386
  retries = 0
387
  delay = self.initial_delay
 
1
  import os
2
  import requests
3
  import json
 
4
  import time
5
  from threading import Thread, Event
6
  from requests.exceptions import RequestException
7
  from tqdm import tqdm
 
8
  import logging
9
 
10
  CACHE_DIR = os.getenv("CACHE_DIR")
 
12
  download_progress = {}
13
 
14
  class Instance:
15
+ def __init__(self, id, url, cache_dir, token, repo, load_balancer_api, max_retries=20, initial_delay=1):
16
  self.version = "0.0.0.1 Alpha"
17
  self.id = id
18
  self.url = url
19
  self.CACHE_DIR = cache_dir
 
20
  self.TOKEN = token
21
  self.REPO = repo
22
+ self.FILM_STORE = {}
23
+ self.TV_STORE = {}
24
  self.download_threads = {}
25
  self.file_structure = None
26
  self.load_balancer_api = load_balancer_api
27
  self.max_retries = max_retries
28
  self.initial_delay = initial_delay
29
+ self.last_report_time = time.time()
30
  self.re_register_event = Event()
31
 
32
  # Ensure CACHE_DIR exists
33
  if not os.path.exists(self.CACHE_DIR):
34
  os.makedirs(self.CACHE_DIR)
35
 
 
 
 
 
 
 
 
 
 
36
  self.register_to_load_balancer()
37
+ self.reload_file_structure()
38
  registration_thread = Thread(target=self.monitor_registration)
39
  registration_thread.daemon = True
40
  registration_thread.start()
41
 
42
  # Start the thread to re-index every 2 minutes
43
+ indexer_thread = Thread(target=self.get_file_structure_periodically)
44
  indexer_thread.daemon = True
45
  indexer_thread.start()
46
 
47
+ def reload_file_structure(self):
48
  """Runs the indexer and loads the file structure from INDEX_FILE."""
49
+ self.file_structure = self.load_balancer_api.get_file_structure()
 
 
 
 
 
50
  logging.info("File structure reloaded successfully.")
51
 
52
+ def get_file_structure_periodically(self):
53
  """Periodically reruns the indexer and reloads the file structure."""
54
  while True:
55
+ time.sleep(300) # Wait for 5 minutes
56
  logging.info("Re-running indexer and reloading file structure.")
57
+ self.reload_file_structure()
58
 
59
  def compile_report(self):
60
  self.last_report_time = time.time() # Update the last report time
 
 
 
61
  cache_size = self.get_cache_size()
 
62
  report = {
63
  "instance_id": self.id,
64
  "instance_url": self.url,
65
+ "film_store": self.FILM_STORE,
66
+ "tv_store": self.TV_STORE,
67
  "cache_size": cache_size
68
  }
69
  return report
 
98
  return json.load(json_file)
99
  return {}
100
 
101
+ def download_film(self, file_url, token, cache_path, film_id, title, chunk_size=100 * 1024 * 1024):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  """
103
  Downloads a file from the specified URL and saves it to the cache path.
104
  Tracks the download progress.
 
107
  file_url (str): The URL of the file to download.
108
  token (str): The authorization token for the request.
109
  cache_path (str): The path to save the downloaded file.
 
110
  film_id (str): Unique identifier for the film download.
111
  title (str): The title of the film.
112
  chunk_size (int): Size of each chunk to download.
113
  """
114
+ print(f"Downloading file from URL: {file_url} to {cache_path}")
115
  headers = {'Authorization': f'Bearer {token}'}
116
  try:
117
+ response = requests.get(file_url, headers=headers, stream=True)
118
  response.raise_for_status()
119
 
120
  total_size = int(response.headers.get('content-length', 0))
 
128
  download_progress[film_id]["downloaded"] += len(data)
129
 
130
  print(f'File cached to {cache_path} successfully.')
131
+ self.FILM_STORE[title] = cache_path
132
  download_progress[film_id]["status"] = "Completed"
133
  except RequestException as e:
134
  print(f"Error downloading file: {e}")
 
168
  return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
169
  return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
 
 
 
 
172
 
173
+ def download_episode(self, file_url, token, cache_path, episode_id, title, chunk_size=100 * 1024 * 1024):
174
  """
175
  Downloads a file from the specified URL and saves it to the cache path.
176
  Tracks the download progress.
 
179
  file_url (str): The URL of the file to download.
180
  token (str): The authorization token for the request.
181
  cache_path (str): The path to save the downloaded file.
 
182
  episode_id (str): Unique identifier for the film download.
183
  title (str): The title of the film.
184
  chunk_size (int): Size of each chunk to download.
185
  """
186
+ print(f"Downloading file from URL: {file_url} to {cache_path}")
187
  headers = {'Authorization': f'Bearer {token}'}
188
  try:
189
+ response = requests.get(file_url, headers=headers, stream=True)
190
  response.raise_for_status()
191
 
192
  total_size = int(response.headers.get('content-length', 0))
 
200
  download_progress[episode_id]["downloaded"] += len(data)
201
 
202
  print(f'File cached to {cache_path} successfully.')
203
+ self.update_tv_store(title, cache_path)
204
  download_progress[episode_id]["status"] = "Completed"
205
  except RequestException as e:
206
  print(f"Error downloading file: {e}")
 
212
  if download_progress[episode_id]["status"] != "Downloading":
213
  download_progress[episode_id]["end_time"] = time.time()
214
 
215
+ def update_tv_store(self, title, cache_path):
216
  """
217
  Updates the TV store JSON with the new file, organizing by title, season, and episode.
218
 
 
220
  title (str): The title of the TV show.
221
  cache_path (str): The local path where the file is saved.
222
  """
 
 
 
 
223
 
224
  # Extract season and episode information from the cache_path
225
  season_part = os.path.basename(os.path.dirname(cache_path)) # Extracts 'Season 1'
226
  episode_part = os.path.basename(cache_path) # Extracts 'Grand Blue Dreaming - S01E01 - Deep Blue HDTV-720p.mp4'
227
 
228
  # Create the structure if not already present
229
+ if title not in self.TV_STORE:
230
+ self.TV_STORE[title] = {}
231
 
232
+ if season_part not in self.TV_STORE[title]:
233
+ self.TV_STORE[title][season_part] = {}
234
 
235
  # Assuming episode_part is unique for each episode within a season
236
+ self.TV_STORE[title][season_part][episode_part] = cache_path
 
 
 
237
 
238
  print(f'TV store updated with {title}, {season_part}, {episode_part}.')
239
 
 
286
  def encode_episodeid(self, title, season, episode):
287
  return f"{title}_{season}_{episode}"
288
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  def register_to_load_balancer(self):
290
  retries = 0
291
  delay = self.initial_delay
api.py CHANGED
@@ -20,3 +20,14 @@ class LoadBalancerAPI:
20
  except requests.exceptions.RequestException as e:
21
  logging.error(f'Failed to register instance {instance_id} to load balancer: {e}')
22
  return None
 
 
 
 
 
 
 
 
 
 
 
 
20
  except requests.exceptions.RequestException as e:
21
  logging.error(f'Failed to register instance {instance_id} to load balancer: {e}')
22
  return None
23
+
24
+ def get_file_structure(self):
25
+ api_endpoint = f'{self.base_url}/api/get/file_structure'
26
+ try:
27
+ headers = {'Content-Type': 'application/json'}
28
+ response = requests.get(api_endpoint, headers=headers)
29
+ response.raise_for_status()
30
+ return response.json() # Assuming the API returns JSON
31
+ except requests.exceptions.RequestException as e:
32
+ logging.error(f'Failed to file structure: {e}')
33
+ return None
app.py CHANGED
@@ -1,4 +1,5 @@
1
  from fastapi import FastAPI
 
2
  from Instance import Instance
3
  from api import LoadBalancerAPI
4
  import os
@@ -18,4 +19,19 @@ app = FastAPI()
18
 
19
  @app.get("/")
20
  async def index():
21
- return instance.version
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI
2
+ from fastapi.responses import JSONResponse
3
  from Instance import Instance
4
  from api import LoadBalancerAPI
5
  import os
 
19
 
20
  @app.get("/")
21
  async def index():
22
+ return instance.version
23
+
24
+ @app.get("/api/get/report")
25
+ async def get_report():
26
+ report=instance.compile_report()
27
+ return JSONResponse(report)
28
+
29
+ @app.get('/api/get/tv/store')
30
+ async def get_tv_store_api():
31
+ """Endpoint to get the TV store JSON."""
32
+ return JSONResponse(instance.TV_STORE)
33
+
34
+ @app.get('/api/get/film/store')
35
+ async def get_film_store_api():
36
+ """Endpoint to get the TV store JSON."""
37
+ return JSONResponse(instance.FILM_STORE)
hf_scrapper.py CHANGED
@@ -1,35 +1,15 @@
1
  import os
2
  import requests
3
- import json
4
- import urllib.request
5
  import time
6
  from requests.exceptions import RequestException
7
  from tqdm import tqdm
 
8
 
9
  CACHE_DIR = os.getenv("CACHE_DIR")
10
- CACHE_JSON_PATH = os.path.join(CACHE_DIR, "cached_films.json")
11
 
12
  download_progress = {}
13
 
14
- def get_system_proxies():
15
- """
16
- Retrieves the system's HTTP and HTTPS proxies.
17
-
18
- Returns:
19
- dict: A dictionary containing the proxies.
20
- """
21
- try:
22
- proxies = urllib.request.getproxies()
23
- print("System proxies:", proxies)
24
- return {
25
- "http": proxies.get("http"),
26
- "https": proxies.get("http")
27
- }
28
- except Exception as e:
29
- print(f"Error getting system proxies: {e}")
30
- return {}
31
-
32
- def download_film(file_url, token, cache_path, proxies, film_id, title, chunk_size=100 * 1024 * 1024):
33
  """
34
  Downloads a file from the specified URL and saves it to the cache path.
35
  Tracks the download progress.
@@ -38,15 +18,14 @@ def download_film(file_url, token, cache_path, proxies, film_id, title, chunk_si
38
  file_url (str): The URL of the file to download.
39
  token (str): The authorization token for the request.
40
  cache_path (str): The path to save the downloaded file.
41
- proxies (dict): Proxies for the request.
42
  film_id (str): Unique identifier for the film download.
43
  title (str): The title of the film.
44
  chunk_size (int): Size of each chunk to download.
45
  """
46
- print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
47
  headers = {'Authorization': f'Bearer {token}'}
48
  try:
49
- response = requests.get(file_url, headers=headers, proxies=proxies, stream=True)
50
  response.raise_for_status()
51
 
52
  total_size = int(response.headers.get('content-length', 0))
@@ -60,7 +39,7 @@ def download_film(file_url, token, cache_path, proxies, film_id, title, chunk_si
60
  download_progress[film_id]["downloaded"] += len(data)
61
 
62
  print(f'File cached to {cache_path} successfully.')
63
- update_film_store_json(title, cache_path)
64
  download_progress[film_id]["status"] = "Completed"
65
  except RequestException as e:
66
  print(f"Error downloading file: {e}")
@@ -99,7 +78,7 @@ def get_download_progress(id):
99
  return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
100
  return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}
101
 
102
- def update_film_store_json(title, cache_path):
103
  """
104
  Updates the film store JSON with the new file.
105
 
@@ -107,22 +86,12 @@ def update_film_store_json(title, cache_path):
107
  title (str): The title of the film.
108
  cache_path (str): The local path where the file is saved.
109
  """
110
- FILM_STORE_JSON_PATH = os.path.join(CACHE_DIR, "film_store.json")
111
-
112
- film_store_data = {}
113
- if os.path.exists(FILM_STORE_JSON_PATH):
114
- with open(FILM_STORE_JSON_PATH, 'r') as json_file:
115
- film_store_data = json.load(json_file)
116
-
117
- film_store_data[title] = cache_path
118
-
119
- with open(FILM_STORE_JSON_PATH, 'w') as json_file:
120
- json.dump(film_store_data, json_file, indent=2)
121
  print(f'Film store updated with {title}.')
122
 
123
 
124
  ###############################################################################
125
- def download_episode(file_url, token, cache_path, proxies, episode_id, title, chunk_size=100 * 1024 * 1024):
126
  """
127
  Downloads a file from the specified URL and saves it to the cache path.
128
  Tracks the download progress.
@@ -131,15 +100,14 @@ def download_episode(file_url, token, cache_path, proxies, episode_id, title, ch
131
  file_url (str): The URL of the file to download.
132
  token (str): The authorization token for the request.
133
  cache_path (str): The path to save the downloaded file.
134
- proxies (dict): Proxies for the request.
135
  film_id (str): Unique identifier for the film download.
136
  title (str): The title of the film.
137
  chunk_size (int): Size of each chunk to download.
138
  """
139
- print(f"Downloading file from URL: {file_url} to {cache_path} with proxies: {proxies}")
140
  headers = {'Authorization': f'Bearer {token}'}
141
  try:
142
- response = requests.get(file_url, headers=headers, proxies=proxies, stream=True)
143
  response.raise_for_status()
144
 
145
  total_size = int(response.headers.get('content-length', 0))
@@ -153,7 +121,7 @@ def download_episode(file_url, token, cache_path, proxies, episode_id, title, ch
153
  download_progress[episode_id]["downloaded"] += len(data)
154
 
155
  print(f'File cached to {cache_path} successfully.')
156
- update_tv_store_json(title, cache_path)
157
  download_progress[episode_id]["status"] = "Completed"
158
  except RequestException as e:
159
  print(f"Error downloading file: {e}")
@@ -166,7 +134,7 @@ def download_episode(file_url, token, cache_path, proxies, episode_id, title, ch
166
  download_progress[episode_id]["end_time"] = time.time()
167
 
168
 
169
- def update_tv_store_json(title, cache_path):
170
  """
171
  Updates the TV store JSON with the new file, organizing by title, season, and episode.
172
 
@@ -174,76 +142,24 @@ def update_tv_store_json(title, cache_path):
174
  title (str): The title of the TV show.
175
  cache_path (str): The local path where the file is saved.
176
  """
177
- TV_STORE_JSON_PATH = os.path.join(CACHE_DIR, "tv_store.json")
178
-
179
- tv_store_data = {}
180
- if os.path.exists(TV_STORE_JSON_PATH):
181
- with open(TV_STORE_JSON_PATH, 'r') as json_file:
182
- tv_store_data = json.load(json_file)
183
-
184
  # Extract season and episode information from the cache_path
185
  season_part = os.path.basename(os.path.dirname(cache_path)) # Extracts 'Season 1'
186
  episode_part = os.path.basename(cache_path) # Extracts 'Grand Blue Dreaming - S01E01 - Deep Blue HDTV-720p.mp4'
187
 
188
  # Create the structure if not already present
189
- if title not in tv_store_data:
190
- tv_store_data[title] = {}
191
 
192
- if season_part not in tv_store_data[title]:
193
- tv_store_data[title][season_part] = {}
194
 
195
  # Assuming episode_part is unique for each episode within a season
196
- tv_store_data[title][season_part][episode_part] = cache_path
197
-
198
- with open(TV_STORE_JSON_PATH, 'w') as json_file:
199
- json.dump(tv_store_data, json_file, indent=2)
200
-
201
  print(f'TV store updated with {title}, {season_part}, {episode_part}.')
202
 
203
- ###############################################################################
204
- def get_file_structure(repo, token, path="", proxies=None):
205
- """
206
- Fetches the file structure of a specified Hugging Face repository.
207
-
208
- Args:
209
- repo (str): The name of the repository.
210
- token (str): The authorization token for the request.
211
- path (str, optional): The specific path in the repository. Defaults to "".
212
- proxies (dict, optional): The proxies to use for the request. Defaults to None.
213
-
214
- Returns:
215
- list: A list of file structure information.
216
- """
217
- api_url = f"https://huggingface.co/api/models/{repo}/tree/main/{path}"
218
- headers = {'Authorization': f'Bearer {token}'}
219
- print(f"Fetching file structure from URL: {api_url} with proxies: {proxies}")
220
- try:
221
- response = requests.get(api_url, headers=headers, proxies=proxies)
222
- response.raise_for_status()
223
- return response.json()
224
- except RequestException as e:
225
- print(f"Error fetching file structure: {e}")
226
- return []
227
-
228
- def write_file_structure_to_json(file_structure, file_path):
229
- """
230
- Writes the file structure to a JSON file.
231
-
232
- Args:
233
- file_structure (list): The file structure data.
234
- file_path (str): The path where the JSON file will be saved.
235
- """
236
- try:
237
- with open(file_path, 'w') as json_file:
238
- json.dump(file_structure, json_file, indent=2)
239
- print(f'File structure written to {file_path}')
240
- except IOError as e:
241
- print(f"Error writing file structure to JSON: {e}")
242
-
243
  if __name__ == "__main__":
244
  file_url = "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
245
  token = os.getenv("TOKEN")
246
  cache_path = os.path.join(CACHE_DIR, "films/Funky Monkey 2004/Funky Monkey (2004) Web-dl 1080p.mp4")
247
- proxies = get_system_proxies()
248
  film_id = "funky_monkey_2004" # Unique identifier for the film download
249
- download_film(file_url, token, cache_path, proxies=proxies, film_id=film_id)
 
1
  import os
2
  import requests
 
 
3
  import time
4
  from requests.exceptions import RequestException
5
  from tqdm import tqdm
6
+ from app import instance
7
 
8
  CACHE_DIR = os.getenv("CACHE_DIR")
 
9
 
10
  download_progress = {}
11
 
12
+ def download_film(file_url, token, cache_path, film_id, title, chunk_size=100 * 1024 * 1024):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  """
14
  Downloads a file from the specified URL and saves it to the cache path.
15
  Tracks the download progress.
 
18
  file_url (str): The URL of the file to download.
19
  token (str): The authorization token for the request.
20
  cache_path (str): The path to save the downloaded file.
 
21
  film_id (str): Unique identifier for the film download.
22
  title (str): The title of the film.
23
  chunk_size (int): Size of each chunk to download.
24
  """
25
+ print(f"Downloading file from URL: {file_url} to {cache_path}")
26
  headers = {'Authorization': f'Bearer {token}'}
27
  try:
28
+ response = requests.get(file_url, headers=headers, stream=True)
29
  response.raise_for_status()
30
 
31
  total_size = int(response.headers.get('content-length', 0))
 
39
  download_progress[film_id]["downloaded"] += len(data)
40
 
41
  print(f'File cached to {cache_path} successfully.')
42
+ update_film_store(title, cache_path)
43
  download_progress[film_id]["status"] = "Completed"
44
  except RequestException as e:
45
  print(f"Error downloading file: {e}")
 
78
  return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
79
  return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}
80
 
81
+ def update_film_store(title, cache_path):
82
  """
83
  Updates the film store JSON with the new file.
84
 
 
86
  title (str): The title of the film.
87
  cache_path (str): The local path where the file is saved.
88
  """
89
+ instance.FILM_STORE[title] = cache_path
 
 
 
 
 
 
 
 
 
 
90
  print(f'Film store updated with {title}.')
91
 
92
 
93
  ###############################################################################
94
+ def download_episode(file_url, token, cache_path, episode_id, title, chunk_size=100 * 1024 * 1024):
95
  """
96
  Downloads a file from the specified URL and saves it to the cache path.
97
  Tracks the download progress.
 
100
  file_url (str): The URL of the file to download.
101
  token (str): The authorization token for the request.
102
  cache_path (str): The path to save the downloaded file.
 
103
  film_id (str): Unique identifier for the film download.
104
  title (str): The title of the film.
105
  chunk_size (int): Size of each chunk to download.
106
  """
107
+ print(f"Downloading file from URL: {file_url} to {cache_path}")
108
  headers = {'Authorization': f'Bearer {token}'}
109
  try:
110
+ response = requests.get(file_url, headers=headers, stream=True)
111
  response.raise_for_status()
112
 
113
  total_size = int(response.headers.get('content-length', 0))
 
121
  download_progress[episode_id]["downloaded"] += len(data)
122
 
123
  print(f'File cached to {cache_path} successfully.')
124
+ update_tv_store(title, cache_path)
125
  download_progress[episode_id]["status"] = "Completed"
126
  except RequestException as e:
127
  print(f"Error downloading file: {e}")
 
134
  download_progress[episode_id]["end_time"] = time.time()
135
 
136
 
137
+ def update_tv_store(title, cache_path):
138
  """
139
  Updates the TV store JSON with the new file, organizing by title, season, and episode.
140
 
 
142
  title (str): The title of the TV show.
143
  cache_path (str): The local path where the file is saved.
144
  """
 
 
 
 
 
 
 
145
  # Extract season and episode information from the cache_path
146
  season_part = os.path.basename(os.path.dirname(cache_path)) # Extracts 'Season 1'
147
  episode_part = os.path.basename(cache_path) # Extracts 'Grand Blue Dreaming - S01E01 - Deep Blue HDTV-720p.mp4'
148
 
149
  # Create the structure if not already present
150
+ if title not in instance.TV_STORE:
151
+ instance.TV_STORE[title] = {}
152
 
153
+ if season_part not in instance.TV_STORE[title]:
154
+ instance.TV_STORE[title][season_part] = {}
155
 
156
  # Assuming episode_part is unique for each episode within a season
157
+ instance.TV_STORE[title][season_part][episode_part] = cache_path
 
 
 
 
158
  print(f'TV store updated with {title}, {season_part}, {episode_part}.')
159
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  if __name__ == "__main__":
161
  file_url = "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
162
  token = os.getenv("TOKEN")
163
  cache_path = os.path.join(CACHE_DIR, "films/Funky Monkey 2004/Funky Monkey (2004) Web-dl 1080p.mp4")
 
164
  film_id = "funky_monkey_2004" # Unique identifier for the film download
165
+ download_film(file_url, token, cache_path, film_id=film_id)
indexer.py DELETED
@@ -1,32 +0,0 @@
1
- import json
2
- from hf_scrapper import get_system_proxies, get_file_structure, write_file_structure_to_json
3
- from dotenv import load_dotenv
4
- import os
5
-
6
- load_dotenv()
7
-
8
- def index_repository(token, repo, current_path="", proxies=None):
9
- file_structure = get_file_structure(repo, token, current_path, proxies)
10
- full_structure = []
11
- for item in file_structure:
12
- if item['type'] == 'directory':
13
- sub_directory_structure = index_repository(token, repo, item['path'], proxies)
14
- full_structure.append({
15
- "type": "directory",
16
- "path": item['path'],
17
- "contents": sub_directory_structure
18
- })
19
- else:
20
- full_structure.append(item)
21
- return full_structure
22
-
23
- def indexer():
24
- token = os.getenv("TOKEN")
25
- repo = os.getenv("REPO")
26
- output_path = os.getenv("INDEX_FILE")
27
-
28
- proxies = get_system_proxies()
29
- full_structure = index_repository(token, repo, "", proxies)
30
- write_file_structure_to_json(full_structure, output_path)
31
- print(f"Full file structure for repository '{repo}' has been indexed and saved to {output_path}")
32
-