Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,13 +2,12 @@ import os
|
|
2 |
import requests
|
3 |
import shutil
|
4 |
import gradio as gr
|
5 |
-
from tqdm import tqdm
|
6 |
from concurrent.futures import ThreadPoolExecutor
|
7 |
from zipfile import ZipFile
|
8 |
|
9 |
def get_image_ids(batch_id: str) -> list[str]:
|
10 |
"""A list of image IDs in the given batch"""
|
11 |
-
response = requests.get(f"https://
|
12 |
response.raise_for_status()
|
13 |
response = response.json()
|
14 |
return [item["id"].split("!")[1][:14] for item in response["items"]]
|
@@ -26,7 +25,7 @@ def download_image(url: str, dest: str) -> None:
|
|
26 |
shutil.copyfileobj(response.raw, out_file)
|
27 |
del response
|
28 |
|
29 |
-
def download_image_by_image_id(image_id: str
|
30 |
"""
|
31 |
Download the image with the given image ID
|
32 |
|
@@ -35,23 +34,20 @@ def download_image_by_image_id(image_id: str, progress=None):
|
|
35 |
"""
|
36 |
batch_id = image_id[:8]
|
37 |
os.makedirs(batch_id, exist_ok=True)
|
38 |
-
url = f"https://
|
39 |
dest = os.path.join(batch_id, image_id + ".jpg")
|
40 |
download_image(url, dest)
|
41 |
-
if progress:
|
42 |
-
progress.update(1)
|
43 |
|
44 |
-
def download_batch_images(batch_id: str, workers: int = 2):
|
45 |
image_ids = get_image_ids(batch_id)
|
46 |
total_images = len(image_ids)
|
47 |
|
48 |
-
progress
|
|
|
49 |
|
50 |
with ThreadPoolExecutor(max_workers=workers) as executor:
|
51 |
for image_id in image_ids:
|
52 |
-
executor.submit(download_image_by_image_id, image_id
|
53 |
-
|
54 |
-
progress.close()
|
55 |
|
56 |
# Zip the folder with downloaded images
|
57 |
zip_filename = f"{batch_id}.zip"
|
@@ -60,12 +56,20 @@ def download_batch_images(batch_id: str, workers: int = 2):
|
|
60 |
img_path = os.path.join(batch_id, f"{image_id}.jpg")
|
61 |
zipf.write(img_path, arcname=os.path.basename(img_path))
|
62 |
|
|
|
|
|
|
|
63 |
return zip_filename
|
64 |
|
65 |
-
def gradio_interface(
|
|
|
|
|
|
|
66 |
try:
|
67 |
-
|
68 |
-
|
|
|
|
|
69 |
except Exception as e:
|
70 |
return str(e)
|
71 |
|
@@ -73,18 +77,18 @@ with gr.Blocks() as app:
|
|
73 |
gr.Markdown("# Batch Image Downloader")
|
74 |
|
75 |
with gr.Row():
|
76 |
-
|
77 |
-
|
78 |
-
|
|
|
|
|
79 |
|
80 |
-
download_button = gr.Button("Download Images")
|
81 |
|
82 |
download_button.click(
|
83 |
gradio_interface,
|
84 |
-
inputs=[
|
85 |
-
outputs=[
|
86 |
)
|
87 |
|
88 |
-
app.queue(
|
89 |
-
|
90 |
app.launch()
|
|
|
2 |
import requests
|
3 |
import shutil
|
4 |
import gradio as gr
|
|
|
5 |
from concurrent.futures import ThreadPoolExecutor
|
6 |
from zipfile import ZipFile
|
7 |
|
8 |
def get_image_ids(batch_id: str) -> list[str]:
|
9 |
"""A list of image IDs in the given batch"""
|
10 |
+
response = requests.get(f"https://iiifintern.ra.se/arkis!{batch_id}/manifest")
|
11 |
response.raise_for_status()
|
12 |
response = response.json()
|
13 |
return [item["id"].split("!")[1][:14] for item in response["items"]]
|
|
|
25 |
shutil.copyfileobj(response.raw, out_file)
|
26 |
del response
|
27 |
|
28 |
+
def download_image_by_image_id(image_id: str):
|
29 |
"""
|
30 |
Download the image with the given image ID
|
31 |
|
|
|
34 |
"""
|
35 |
batch_id = image_id[:8]
|
36 |
os.makedirs(batch_id, exist_ok=True)
|
37 |
+
url = f"https://iiifintern.ra.se/arkis!{image_id}/full/max/0/default.jpg"
|
38 |
dest = os.path.join(batch_id, image_id + ".jpg")
|
39 |
download_image(url, dest)
|
|
|
|
|
40 |
|
41 |
+
def download_batch_images(batch_id: str, workers: int = 2, progress=None):
|
42 |
image_ids = get_image_ids(batch_id)
|
43 |
total_images = len(image_ids)
|
44 |
|
45 |
+
if progress:
|
46 |
+
progress(0, desc=f"Starting download for {batch_id}...")
|
47 |
|
48 |
with ThreadPoolExecutor(max_workers=workers) as executor:
|
49 |
for image_id in image_ids:
|
50 |
+
executor.submit(download_image_by_image_id, image_id)
|
|
|
|
|
51 |
|
52 |
# Zip the folder with downloaded images
|
53 |
zip_filename = f"{batch_id}.zip"
|
|
|
56 |
img_path = os.path.join(batch_id, f"{image_id}.jpg")
|
57 |
zipf.write(img_path, arcname=os.path.basename(img_path))
|
58 |
|
59 |
+
if progress:
|
60 |
+
progress(1, desc=f"Completed {batch_id}")
|
61 |
+
|
62 |
return zip_filename
|
63 |
|
64 |
+
def gradio_interface(batch_ids_input, progress=gr.Progress()):
|
65 |
+
batch_ids = [batch_id.strip() for batch_id in batch_ids_input.split("\n") if batch_id.strip()]
|
66 |
+
|
67 |
+
zip_files = []
|
68 |
try:
|
69 |
+
for batch_id in progress.tqdm(batch_ids, desc="Processing batches"):
|
70 |
+
zip_file = download_batch_images(batch_id, progress=progress)
|
71 |
+
zip_files.append(zip_file)
|
72 |
+
return zip_files # Return the list of zip files for download
|
73 |
except Exception as e:
|
74 |
return str(e)
|
75 |
|
|
|
77 |
gr.Markdown("# Batch Image Downloader")
|
78 |
|
79 |
with gr.Row():
|
80 |
+
with gr.Column():
|
81 |
+
batch_ids_input = gr.Textbox(label="Batch IDs (one per line)", placeholder="Enter batch IDs, one per line.")
|
82 |
+
download_button = gr.Button("Download Images")
|
83 |
+
with gr.Column():
|
84 |
+
output_files = gr.File(label="Download Zip Files", file_count="multiple")
|
85 |
|
|
|
86 |
|
87 |
download_button.click(
|
88 |
gradio_interface,
|
89 |
+
inputs=[batch_ids_input],
|
90 |
+
outputs=[output_files]
|
91 |
)
|
92 |
|
93 |
+
app.queue()
|
|
|
94 |
app.launch()
|