instance1 / hf_scrapper.py
ChandimaPrabath's picture
init
dd2f10e
import os
import requests
import time
from requests.exceptions import RequestException
from tqdm import tqdm
from app import instance
CACHE_DIR = os.getenv("CACHE_DIR")
download_progress = {}
def download_film(file_url, token, cache_path, film_id, title, chunk_size=100 * 1024 * 1024):
"""
Downloads a file from the specified URL and saves it to the cache path.
Tracks the download progress.
Args:
file_url (str): The URL of the file to download.
token (str): The authorization token for the request.
cache_path (str): The path to save the downloaded file.
film_id (str): Unique identifier for the film download.
title (str): The title of the film.
chunk_size (int): Size of each chunk to download.
"""
print(f"Downloading file from URL: {file_url} to {cache_path}")
headers = {'Authorization': f'Bearer {token}'}
try:
response = requests.get(file_url, headers=headers, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
download_progress[film_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
for data in response.iter_content(chunk_size=chunk_size):
file.write(data)
pbar.update(len(data))
download_progress[film_id]["downloaded"] += len(data)
print(f'File cached to {cache_path} successfully.')
update_film_store(title, cache_path)
download_progress[film_id]["status"] = "Completed"
except RequestException as e:
print(f"Error downloading file: {e}")
download_progress[film_id]["status"] = "Failed"
except IOError as e:
print(f"Error writing file {cache_path}: {e}")
download_progress[film_id]["status"] = "Failed"
finally:
if download_progress[film_id]["status"] != "Downloading":
download_progress[film_id]["end_time"] = time.time()
def get_download_progress(id):
"""
Gets the download progress for a specific film.
Args:
film_id (str): The unique identifier for the film download.
Returns:
dict: A dictionary containing the total size, downloaded size, progress percentage, status, and ETA.
"""
if id in download_progress:
total = download_progress[id]["total"]
downloaded = download_progress[id]["downloaded"]
status = download_progress[id].get("status", "In Progress")
progress = (downloaded / total) * 100 if total > 0 else 0
eta = None
if status == "Downloading" and downloaded > 0:
elapsed_time = time.time() - download_progress[id]["start_time"]
estimated_total_time = elapsed_time * (total / downloaded)
eta = estimated_total_time - elapsed_time
elif status == "Completed":
eta = 0
return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}
def update_film_store(title, cache_path):
"""
Updates the film store JSON with the new file.
Args:
title (str): The title of the film.
cache_path (str): The local path where the file is saved.
"""
instance.FILM_STORE[title] = cache_path
print(f'Film store updated with {title}.')
###############################################################################
def download_episode(file_url, token, cache_path, episode_id, title, chunk_size=100 * 1024 * 1024):
"""
Downloads a file from the specified URL and saves it to the cache path.
Tracks the download progress.
Args:
file_url (str): The URL of the file to download.
token (str): The authorization token for the request.
cache_path (str): The path to save the downloaded file.
film_id (str): Unique identifier for the film download.
title (str): The title of the film.
chunk_size (int): Size of each chunk to download.
"""
print(f"Downloading file from URL: {file_url} to {cache_path}")
headers = {'Authorization': f'Bearer {token}'}
try:
response = requests.get(file_url, headers=headers, stream=True)
response.raise_for_status()
total_size = int(response.headers.get('content-length', 0))
download_progress[episode_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
for data in response.iter_content(chunk_size=chunk_size):
file.write(data)
pbar.update(len(data))
download_progress[episode_id]["downloaded"] += len(data)
print(f'File cached to {cache_path} successfully.')
update_tv_store(title, cache_path)
download_progress[episode_id]["status"] = "Completed"
except RequestException as e:
print(f"Error downloading file: {e}")
download_progress[episode_id]["status"] = "Failed"
except IOError as e:
print(f"Error writing file {cache_path}: {e}")
download_progress[episode_id]["status"] = "Failed"
finally:
if download_progress[episode_id]["status"] != "Downloading":
download_progress[episode_id]["end_time"] = time.time()
def update_tv_store(title, cache_path):
"""
Updates the TV store JSON with the new file, organizing by title, season, and episode.
Args:
title (str): The title of the TV show.
cache_path (str): The local path where the file is saved.
"""
# Extract season and episode information from the cache_path
season_part = os.path.basename(os.path.dirname(cache_path)) # Extracts 'Season 1'
episode_part = os.path.basename(cache_path) # Extracts 'Grand Blue Dreaming - S01E01 - Deep Blue HDTV-720p.mp4'
# Create the structure if not already present
if title not in instance.TV_STORE:
instance.TV_STORE[title] = {}
if season_part not in instance.TV_STORE[title]:
instance.TV_STORE[title][season_part] = {}
# Assuming episode_part is unique for each episode within a season
instance.TV_STORE[title][season_part][episode_part] = cache_path
print(f'TV store updated with {title}, {season_part}, {episode_part}.')
if __name__ == "__main__":
file_url = "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
token = os.getenv("TOKEN")
cache_path = os.path.join(CACHE_DIR, "films/Funky Monkey 2004/Funky Monkey (2004) Web-dl 1080p.mp4")
film_id = "funky_monkey_2004" # Unique identifier for the film download
download_film(file_url, token, cache_path, film_id=film_id)