Gabriel commited on
Commit
73c784b
1 Parent(s): 126961f

Create ifff_downloader.py

Browse files
Files changed (1) hide show
  1. ifff_downloader.py +88 -0
ifff_downloader.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import shutil
4
+ import gradio as gr
5
+ from tqdm import tqdm
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ from zipfile import ZipFile
8
+
9
+ def get_image_ids(batch_id: str) -> list[str]:
10
+ """A list of image IDs in the given batch"""
11
+ response = requests.get(f"https://iiifintern.ra.se/arkis!{batch_id}/manifest")
12
+ response.raise_for_status()
13
+ response = response.json()
14
+ return [item["id"].split("!")[1][:14] for item in response["items"]]
15
+
16
+ def download_image(url: str, dest: str) -> None:
17
+ """
18
+ Download an image
19
+
20
+ Arguments:
21
+ url: Image url
22
+ dest: Destination file name
23
+ """
24
+ response = requests.get(url, stream=True)
25
+ with open(dest, "wb") as out_file:
26
+ shutil.copyfileobj(response.raw, out_file)
27
+ del response
28
+
29
+ def download_image_by_image_id(image_id: str, progress=None):
30
+ """
31
+ Download the image with the given image ID
32
+
33
+ Creates a directory named after the batch ID and saves the image in
34
+ that directory.
35
+ """
36
+ batch_id = image_id[:8]
37
+ os.makedirs(batch_id, exist_ok=True)
38
+ url = f"https://lbiiif.riksarkivet.se/arkis!{image_id}/full/max/0/default.jpg"
39
+ dest = os.path.join(batch_id, image_id + ".jpg")
40
+ download_image(url, dest)
41
+ if progress:
42
+ progress.update(1)
43
+
44
+ def download_batch_images(batch_id: str, workers: int = 2):
45
+ image_ids = get_image_ids(batch_id)
46
+ total_images = len(image_ids)
47
+
48
+ progress = tqdm(total=total_images, desc=f"Downloading {batch_id}", leave=False)
49
+
50
+ with ThreadPoolExecutor(max_workers=workers) as executor:
51
+ for image_id in image_ids:
52
+ executor.submit(download_image_by_image_id, image_id, progress)
53
+
54
+ progress.close()
55
+
56
+ # Zip the folder with downloaded images
57
+ zip_filename = f"{batch_id}.zip"
58
+ with ZipFile(zip_filename, 'w') as zipf:
59
+ for image_id in image_ids:
60
+ img_path = os.path.join(batch_id, f"{image_id}.jpg")
61
+ zipf.write(img_path, arcname=os.path.basename(img_path))
62
+
63
+ return zip_filename
64
+
65
+ def gradio_interface(batch_id):
66
+ try:
67
+ zip_file = download_batch_images(batch_id)
68
+ return zip_file # Return the zip file path for download
69
+ except Exception as e:
70
+ return str(e)
71
+
72
+ with gr.Blocks() as app:
73
+ gr.Markdown("# Batch Image Downloader")
74
+
75
+ with gr.Row():
76
+ batch_id_input = gr.Textbox(label="Batch ID")
77
+
78
+ output_file = gr.File(label="Download Zip File")
79
+
80
+ download_button = gr.Button("Download Images")
81
+
82
+ download_button.click(
83
+ gradio_interface,
84
+ inputs=[batch_id_input],
85
+ outputs=[output_file]
86
+ )
87
+
88
+ app.launch()