iiif_downloader / app.py
Gabriel's picture
Update app.py
741a077 verified
from concurrent.futures import ThreadPoolExecutor
import os
import requests
import shutil
import gradio as gr
from zipfile import ZipFile
import logging
from typing import IO
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
IIIF_URL = "https://lbiiif.riksarkivet.se" # "https://iiifintern.ra.se"
def get_image_ids(batch_id: str) -> list[str]:
"""A list of image IDs in the given batch"""
logging.info(f"Fetching image IDs for batch {batch_id}")
response = requests.get(f"{IIIF_URL}/arkis!{batch_id}/manifest")
response.raise_for_status()
response = response.json()
image_ids = []
for item in response.get("items", []):
id_parts = item["id"].split("!")
if len(id_parts) > 1:
image_id = id_parts[1][:14]
image_ids.append(image_id)
else:
logging.warning(f"Unexpected id format: {item['id']}")
if not image_ids:
raise ValueError("No images found in the manifest.")
logging.info(f"Found {len(image_ids)} images in batch {batch_id}")
return image_ids
def download_image(url: str, dest: str) -> None:
"""
Download an image
Arguments:
url: Image url
dest: Destination file name
"""
logging.info(f"Downloading image from {url} to {dest}")
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(dest, "wb") as out_file:
shutil.copyfileobj(response.raw, out_file)
logging.info(f"Successfully downloaded image: {dest}")
else:
logging.error(f"Failed to download image from {url}. Status code: {response.status_code}")
raise Exception(f"Failed to download image from {url}. Status code: {response.status_code}")
del response
def download_image_by_image_id(image_id: str):
"""
Download the image with the given image ID
Creates a directory named after the batch ID and saves the image in
that directory.
"""
batch_id = image_id[:8]
os.makedirs(batch_id, exist_ok=True)
url = f"{IIIF_URL}/arkis!{image_id}/full/max/0/default.jpg"
dest = os.path.join(batch_id, image_id + ".jpg")
download_image(url, dest)
def rest_download_batch_images(batch_id: str) -> str:
image_ids = get_image_ids(batch_id)
def track_download(image_id):
download_image_by_image_id(image_id)
with ThreadPoolExecutor() as executor:
for _, image_id in enumerate(image_ids):
executor.submit(track_download, image_id)
zip_filename = f"{batch_id}.zip"
with ZipFile(zip_filename, 'w') as zipf:
for image_id in image_ids:
img_path = os.path.join(batch_id, f"{image_id}.jpg")
if os.path.exists(img_path):
zipf.write(img_path, arcname=os.path.basename(img_path))
return zip_filename
def download_batch_images(batch_id: str, progress=None):
logging.info(f"Starting download for batch {batch_id}")
if progress is not None:
progress(0, desc=f"Starting download for {batch_id}...")
image_ids = get_image_ids(batch_id)
total_images = len(image_ids)
for idx, image_id in enumerate(image_ids):
download_image_by_image_id(image_id)
logging.info(f"Downloaded image {image_id}")
if progress is not None:
current_progress = (idx + 1) / total_images
progress(current_progress, desc=f"Downloading {image_id}...")
logging.info(f"Zipping downloaded images for batch {batch_id}")
zip_filename = f"{batch_id}.zip"
with ZipFile(zip_filename, 'w') as zipf:
for image_id in image_ids:
img_path = os.path.join(batch_id, f"{image_id}.jpg")
if os.path.exists(img_path):
zipf.write(img_path, arcname=os.path.basename(img_path))
else:
logging.warning(f"Image {img_path} does not exist and will not be zipped.")
if progress is not None:
progress(1, desc=f"Completed {batch_id}")
logging.info(f"Completed download and zip for batch {batch_id}")
return zip_filename
def gradio_interface(batch_id_input, progress=gr.Progress()):
try:
zip_file = download_batch_images(batch_id_input, progress=progress)
return zip_file
except Exception as e:
logging.error(f"Error processing batch: {e}")
raise gr.Error(f"Error: {str(e)}")
def rest_gradio_interface(batch_id_input :str ) -> IO[bytes]:
try:
zip_file = rest_download_batch_images(batch_id_input)
return zip_file
except Exception as e:
logging.error(f"Error processing batch: {e}")
raise gr.Error(f"Error: {str(e)}")
with gr.Blocks() as app:
gr.Markdown("# IIIF Downloader")
with gr.Tab("Download Batch"):
with gr.Row():
with gr.Column():
batch_id_input = gr.Textbox(label="Batch ID", placeholder="Enter batch ID.")
download_button = gr.Button("Download Images")
with gr.Column():
output_file = gr.File(label="Download Zip File")
download_button.click(
gradio_interface,
inputs=[batch_id_input],
outputs=[output_file]
)
download_button.click(
rest_gradio_interface,
api_name="iiif_rest_download" ,
inputs=[batch_id_input],
outputs=[output_file]
)
with gr.Tab("Multiple Batches"):
gr.Markdown("WIP")
gr.Markdown("Make it possible to download batches to a huggingface account so it can be used through fastapi")
gr.Markdown("Will uses threading")
pass
with gr.Tab("How to use"):
gr.Markdown("WIP, instructional video")
pass
app.queue()
app.launch()