import os import requests import shutil import gradio as gr from concurrent.futures import ThreadPoolExecutor from zipfile import ZipFile import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') IIIF_URL = "https://lbiiif.riksarkivet.se" #"https://iiifintern.ra.se" def get_image_ids(batch_id: str) -> list[str]: """A list of image IDs in the given batch""" logging.info(f"Fetching image IDs for batch {batch_id}") response = requests.get(f"{IIIF_URL}/arkis!{batch_id}/manifest") response.raise_for_status() response = response.json() image_ids = [item["id"].split("!")[1][:14] for item in response["items"]] logging.info(f"Found {len(image_ids)} images in batch {batch_id}") return image_ids def download_image(url: str, dest: str) -> None: """ Download an image Arguments: url: Image url dest: Destination file name """ logging.info(f"Downloading image from {url} to {dest}") response = requests.get(url, stream=True) with open(dest, "wb") as out_file: shutil.copyfileobj(response.raw, out_file) del response def download_image_by_image_id(image_id: str): """ Download the image with the given image ID Creates a directory named after the batch ID and saves the image in that directory. """ batch_id = image_id[:8] os.makedirs(batch_id, exist_ok=True) url = f"{IIIF_URL}/arkis!{image_id}/full/max/0/default.jpg" dest = os.path.join(batch_id, image_id + ".jpg") download_image(url, dest) def download_batch_images(batch_id: str, workers: int = 2, progress=None): logging.info(f"Starting download for batch {batch_id}") image_ids = get_image_ids(batch_id) total_images = len(image_ids) if progress: progress(0, desc=f"Starting download for {batch_id}...") def track_download(image_id): download_image_by_image_id(image_id) logging.info(f"Downloaded image {image_id}") if progress: # Update progress after each image current_progress = (image_ids.index(image_id) + 1) / total_images progress(current_progress, desc=f"Downloading {image_id}...") with ThreadPoolExecutor(max_workers=workers) as executor: for image_id in image_ids: executor.submit(track_download, image_id) logging.info(f"Zipping downloaded images for batch {batch_id}") zip_filename = f"{batch_id}.zip" with ZipFile(zip_filename, 'w') as zipf: for image_id in image_ids: img_path = os.path.join(batch_id, f"{image_id}.jpg") zipf.write(img_path, arcname=os.path.basename(img_path)) if progress: progress(1, desc=f"Completed {batch_id}") logging.info(f"Completed download and zip for batch {batch_id}") return zip_filename def gradio_interface(batch_ids_input, progress=gr.Progress()): batch_ids = [batch_id.strip() for batch_id in batch_ids_input.split("\n") if batch_id.strip()] zip_files = [] try: for batch_id in progress.tqdm(batch_ids, desc="Processing batches"): logging.info(f"Processing batch {batch_id}") zip_file = download_batch_images(batch_id, progress=progress) zip_files.append(zip_file) return zip_files # Return the list of zip files for download except Exception as e: logging.error(f"Error processing batches: {e}") return str(e) with gr.Blocks() as app: gr.Markdown("# Batch Image Downloader") with gr.Row(): with gr.Column(): batch_ids_input = gr.Textbox(label="Batch IDs (one per line)", placeholder="Enter batch IDs, one per line.") download_button = gr.Button("Download Images") with gr.Column(): output_files = gr.File(label="Download Zip Files", file_count="multiple") download_button.click( gradio_interface, inputs=[batch_ids_input], outputs=[output_files] ) app.queue() app.launch()