instance1

Running

App Files Files Community

instance1 / hf_scrapper.py

ChandimaPrabath

init

dd2f10e 3 months ago

raw

history blame contribute delete

7.2 kB

	import os
	import requests
	import time
	from requests.exceptions import RequestException
	from tqdm import tqdm
	from app import instance

	CACHE_DIR = os.getenv("CACHE_DIR")

	download_progress = {}

	def download_film(file_url, token, cache_path, film_id, title, chunk_size=100 * 1024 * 1024):
	"""
	Downloads a file from the specified URL and saves it to the cache path.
	Tracks the download progress.

	Args:
	file_url (str): The URL of the file to download.
	token (str): The authorization token for the request.
	cache_path (str): The path to save the downloaded file.
	film_id (str): Unique identifier for the film download.
	title (str): The title of the film.
	chunk_size (int): Size of each chunk to download.
	"""
	print(f"Downloading file from URL: {file_url} to {cache_path}")
	headers = {'Authorization': f'Bearer {token}'}
	try:
	response = requests.get(file_url, headers=headers, stream=True)
	response.raise_for_status()

	total_size = int(response.headers.get('content-length', 0))
	download_progress[film_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}

	os.makedirs(os.path.dirname(cache_path), exist_ok=True)
	with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
	for data in response.iter_content(chunk_size=chunk_size):
	file.write(data)
	pbar.update(len(data))
	download_progress[film_id]["downloaded"] += len(data)

	print(f'File cached to {cache_path} successfully.')
	update_film_store(title, cache_path)
	download_progress[film_id]["status"] = "Completed"
	except RequestException as e:
	print(f"Error downloading file: {e}")
	download_progress[film_id]["status"] = "Failed"
	except IOError as e:
	print(f"Error writing file {cache_path}: {e}")
	download_progress[film_id]["status"] = "Failed"
	finally:
	if download_progress[film_id]["status"] != "Downloading":
	download_progress[film_id]["end_time"] = time.time()

	def get_download_progress(id):
	"""
	Gets the download progress for a specific film.

	Args:
	film_id (str): The unique identifier for the film download.

	Returns:
	dict: A dictionary containing the total size, downloaded size, progress percentage, status, and ETA.
	"""
	if id in download_progress:
	total = download_progress[id]["total"]
	downloaded = download_progress[id]["downloaded"]
	status = download_progress[id].get("status", "In Progress")
	progress = (downloaded / total) * 100 if total > 0 else 0

	eta = None
	if status == "Downloading" and downloaded > 0:
	elapsed_time = time.time() - download_progress[id]["start_time"]
	estimated_total_time = elapsed_time * (total / downloaded)
	eta = estimated_total_time - elapsed_time
	elif status == "Completed":
	eta = 0

	return {"total": total, "downloaded": downloaded, "progress": progress, "status": status, "eta": eta}
	return {"total": 0, "downloaded": 0, "progress": 0, "status": "Not Found", "eta": None}

	def update_film_store(title, cache_path):
	"""
	Updates the film store JSON with the new file.

	Args:
	title (str): The title of the film.
	cache_path (str): The local path where the file is saved.
	"""
	instance.FILM_STORE[title] = cache_path
	print(f'Film store updated with {title}.')


	###############################################################################
	def download_episode(file_url, token, cache_path, episode_id, title, chunk_size=100 * 1024 * 1024):
	"""
	Downloads a file from the specified URL and saves it to the cache path.
	Tracks the download progress.

	Args:
	file_url (str): The URL of the file to download.
	token (str): The authorization token for the request.
	cache_path (str): The path to save the downloaded file.
	film_id (str): Unique identifier for the film download.
	title (str): The title of the film.
	chunk_size (int): Size of each chunk to download.
	"""
	print(f"Downloading file from URL: {file_url} to {cache_path}")
	headers = {'Authorization': f'Bearer {token}'}
	try:
	response = requests.get(file_url, headers=headers, stream=True)
	response.raise_for_status()

	total_size = int(response.headers.get('content-length', 0))
	download_progress[episode_id] = {"total": total_size, "downloaded": 0, "status": "Downloading", "start_time": time.time()}

	os.makedirs(os.path.dirname(cache_path), exist_ok=True)
	with open(cache_path, 'wb') as file, tqdm(total=total_size, unit='B', unit_scale=True, desc=cache_path) as pbar:
	for data in response.iter_content(chunk_size=chunk_size):
	file.write(data)
	pbar.update(len(data))
	download_progress[episode_id]["downloaded"] += len(data)

	print(f'File cached to {cache_path} successfully.')
	update_tv_store(title, cache_path)
	download_progress[episode_id]["status"] = "Completed"
	except RequestException as e:
	print(f"Error downloading file: {e}")
	download_progress[episode_id]["status"] = "Failed"
	except IOError as e:
	print(f"Error writing file {cache_path}: {e}")
	download_progress[episode_id]["status"] = "Failed"
	finally:
	if download_progress[episode_id]["status"] != "Downloading":
	download_progress[episode_id]["end_time"] = time.time()


	def update_tv_store(title, cache_path):
	"""
	Updates the TV store JSON with the new file, organizing by title, season, and episode.

	Args:
	title (str): The title of the TV show.
	cache_path (str): The local path where the file is saved.
	"""
	# Extract season and episode information from the cache_path
	season_part = os.path.basename(os.path.dirname(cache_path)) # Extracts 'Season 1'
	episode_part = os.path.basename(cache_path) # Extracts 'Grand Blue Dreaming - S01E01 - Deep Blue HDTV-720p.mp4'

	# Create the structure if not already present
	if title not in instance.TV_STORE:
	instance.TV_STORE[title] = {}

	if season_part not in instance.TV_STORE[title]:
	instance.TV_STORE[title][season_part] = {}

	# Assuming episode_part is unique for each episode within a season
	instance.TV_STORE[title][season_part][episode_part] = cache_path
	print(f'TV store updated with {title}, {season_part}, {episode_part}.')

	if __name__ == "__main__":
	file_url = "https://huggingface.co/Unicone-Studio/jellyfin_media/resolve/main/films/Funky%20Monkey%202004/Funky%20Monkey%20(2004)%20Web-dl%201080p.mp4"
	token = os.getenv("TOKEN")
	cache_path = os.path.join(CACHE_DIR, "films/Funky Monkey 2004/Funky Monkey (2004) Web-dl 1080p.mp4")
	film_id = "funky_monkey_2004" # Unique identifier for the film download
	download_film(file_url, token, cache_path, film_id=film_id)