Spaces:
Sleeping
Sleeping
Create ifff_downloader.py
Browse files- ifff_downloader.py +88 -0
ifff_downloader.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
import shutil
|
4 |
+
import gradio as gr
|
5 |
+
from tqdm import tqdm
|
6 |
+
from concurrent.futures import ThreadPoolExecutor
|
7 |
+
from zipfile import ZipFile
|
8 |
+
|
9 |
+
def get_image_ids(batch_id: str) -> list[str]:
|
10 |
+
"""A list of image IDs in the given batch"""
|
11 |
+
response = requests.get(f"https://iiifintern.ra.se/arkis!{batch_id}/manifest")
|
12 |
+
response.raise_for_status()
|
13 |
+
response = response.json()
|
14 |
+
return [item["id"].split("!")[1][:14] for item in response["items"]]
|
15 |
+
|
16 |
+
def download_image(url: str, dest: str) -> None:
|
17 |
+
"""
|
18 |
+
Download an image
|
19 |
+
|
20 |
+
Arguments:
|
21 |
+
url: Image url
|
22 |
+
dest: Destination file name
|
23 |
+
"""
|
24 |
+
response = requests.get(url, stream=True)
|
25 |
+
with open(dest, "wb") as out_file:
|
26 |
+
shutil.copyfileobj(response.raw, out_file)
|
27 |
+
del response
|
28 |
+
|
29 |
+
def download_image_by_image_id(image_id: str, progress=None):
|
30 |
+
"""
|
31 |
+
Download the image with the given image ID
|
32 |
+
|
33 |
+
Creates a directory named after the batch ID and saves the image in
|
34 |
+
that directory.
|
35 |
+
"""
|
36 |
+
batch_id = image_id[:8]
|
37 |
+
os.makedirs(batch_id, exist_ok=True)
|
38 |
+
url = f"https://lbiiif.riksarkivet.se/arkis!{image_id}/full/max/0/default.jpg"
|
39 |
+
dest = os.path.join(batch_id, image_id + ".jpg")
|
40 |
+
download_image(url, dest)
|
41 |
+
if progress:
|
42 |
+
progress.update(1)
|
43 |
+
|
44 |
+
def download_batch_images(batch_id: str, workers: int = 2):
|
45 |
+
image_ids = get_image_ids(batch_id)
|
46 |
+
total_images = len(image_ids)
|
47 |
+
|
48 |
+
progress = tqdm(total=total_images, desc=f"Downloading {batch_id}", leave=False)
|
49 |
+
|
50 |
+
with ThreadPoolExecutor(max_workers=workers) as executor:
|
51 |
+
for image_id in image_ids:
|
52 |
+
executor.submit(download_image_by_image_id, image_id, progress)
|
53 |
+
|
54 |
+
progress.close()
|
55 |
+
|
56 |
+
# Zip the folder with downloaded images
|
57 |
+
zip_filename = f"{batch_id}.zip"
|
58 |
+
with ZipFile(zip_filename, 'w') as zipf:
|
59 |
+
for image_id in image_ids:
|
60 |
+
img_path = os.path.join(batch_id, f"{image_id}.jpg")
|
61 |
+
zipf.write(img_path, arcname=os.path.basename(img_path))
|
62 |
+
|
63 |
+
return zip_filename
|
64 |
+
|
65 |
+
def gradio_interface(batch_id):
|
66 |
+
try:
|
67 |
+
zip_file = download_batch_images(batch_id)
|
68 |
+
return zip_file # Return the zip file path for download
|
69 |
+
except Exception as e:
|
70 |
+
return str(e)
|
71 |
+
|
72 |
+
with gr.Blocks() as app:
|
73 |
+
gr.Markdown("# Batch Image Downloader")
|
74 |
+
|
75 |
+
with gr.Row():
|
76 |
+
batch_id_input = gr.Textbox(label="Batch ID")
|
77 |
+
|
78 |
+
output_file = gr.File(label="Download Zip File")
|
79 |
+
|
80 |
+
download_button = gr.Button("Download Images")
|
81 |
+
|
82 |
+
download_button.click(
|
83 |
+
gradio_interface,
|
84 |
+
inputs=[batch_id_input],
|
85 |
+
outputs=[output_file]
|
86 |
+
)
|
87 |
+
|
88 |
+
app.launch()
|