Gabriel commited on
Commit
a874957
1 Parent(s): efdb17e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -22
app.py CHANGED
@@ -2,13 +2,12 @@ import os
2
  import requests
3
  import shutil
4
  import gradio as gr
5
- from tqdm import tqdm
6
  from concurrent.futures import ThreadPoolExecutor
7
  from zipfile import ZipFile
8
 
9
  def get_image_ids(batch_id: str) -> list[str]:
10
  """A list of image IDs in the given batch"""
11
- response = requests.get(f"https://lbiiif.riksarkivet.se/arkis!{batch_id}/manifest")
12
  response.raise_for_status()
13
  response = response.json()
14
  return [item["id"].split("!")[1][:14] for item in response["items"]]
@@ -26,7 +25,7 @@ def download_image(url: str, dest: str) -> None:
26
  shutil.copyfileobj(response.raw, out_file)
27
  del response
28
 
29
- def download_image_by_image_id(image_id: str, progress=None):
30
  """
31
  Download the image with the given image ID
32
 
@@ -35,23 +34,20 @@ def download_image_by_image_id(image_id: str, progress=None):
35
  """
36
  batch_id = image_id[:8]
37
  os.makedirs(batch_id, exist_ok=True)
38
- url = f"https://lbiiif.riksarkivet.se/arkis!{image_id}/full/max/0/default.jpg"
39
  dest = os.path.join(batch_id, image_id + ".jpg")
40
  download_image(url, dest)
41
- if progress:
42
- progress.update(1)
43
 
44
- def download_batch_images(batch_id: str, workers: int = 2):
45
  image_ids = get_image_ids(batch_id)
46
  total_images = len(image_ids)
47
 
48
- progress = tqdm(total=total_images, desc=f"Downloading {batch_id}", leave=False)
 
49
 
50
  with ThreadPoolExecutor(max_workers=workers) as executor:
51
  for image_id in image_ids:
52
- executor.submit(download_image_by_image_id, image_id, progress)
53
-
54
- progress.close()
55
 
56
  # Zip the folder with downloaded images
57
  zip_filename = f"{batch_id}.zip"
@@ -60,12 +56,20 @@ def download_batch_images(batch_id: str, workers: int = 2):
60
  img_path = os.path.join(batch_id, f"{image_id}.jpg")
61
  zipf.write(img_path, arcname=os.path.basename(img_path))
62
 
 
 
 
63
  return zip_filename
64
 
65
- def gradio_interface(batch_id):
 
 
 
66
  try:
67
- zip_file = download_batch_images(batch_id)
68
- return zip_file # Return the zip file path for download
 
 
69
  except Exception as e:
70
  return str(e)
71
 
@@ -73,18 +77,18 @@ with gr.Blocks() as app:
73
  gr.Markdown("# Batch Image Downloader")
74
 
75
  with gr.Row():
76
- batch_id_input = gr.Textbox(label="Batch ID")
77
-
78
- output_file = gr.File(label="Download Zip File")
 
 
79
 
80
- download_button = gr.Button("Download Images")
81
 
82
  download_button.click(
83
  gradio_interface,
84
- inputs=[batch_id_input],
85
- outputs=[output_file]
86
  )
87
 
88
- app.queue(max_size=10)
89
-
90
  app.launch()
 
2
  import requests
3
  import shutil
4
  import gradio as gr
 
5
  from concurrent.futures import ThreadPoolExecutor
6
  from zipfile import ZipFile
7
 
8
  def get_image_ids(batch_id: str) -> list[str]:
9
  """A list of image IDs in the given batch"""
10
+ response = requests.get(f"https://iiifintern.ra.se/arkis!{batch_id}/manifest")
11
  response.raise_for_status()
12
  response = response.json()
13
  return [item["id"].split("!")[1][:14] for item in response["items"]]
 
25
  shutil.copyfileobj(response.raw, out_file)
26
  del response
27
 
28
+ def download_image_by_image_id(image_id: str):
29
  """
30
  Download the image with the given image ID
31
 
 
34
  """
35
  batch_id = image_id[:8]
36
  os.makedirs(batch_id, exist_ok=True)
37
+ url = f"https://iiifintern.ra.se/arkis!{image_id}/full/max/0/default.jpg"
38
  dest = os.path.join(batch_id, image_id + ".jpg")
39
  download_image(url, dest)
 
 
40
 
41
+ def download_batch_images(batch_id: str, workers: int = 2, progress=None):
42
  image_ids = get_image_ids(batch_id)
43
  total_images = len(image_ids)
44
 
45
+ if progress:
46
+ progress(0, desc=f"Starting download for {batch_id}...")
47
 
48
  with ThreadPoolExecutor(max_workers=workers) as executor:
49
  for image_id in image_ids:
50
+ executor.submit(download_image_by_image_id, image_id)
 
 
51
 
52
  # Zip the folder with downloaded images
53
  zip_filename = f"{batch_id}.zip"
 
56
  img_path = os.path.join(batch_id, f"{image_id}.jpg")
57
  zipf.write(img_path, arcname=os.path.basename(img_path))
58
 
59
+ if progress:
60
+ progress(1, desc=f"Completed {batch_id}")
61
+
62
  return zip_filename
63
 
64
+ def gradio_interface(batch_ids_input, progress=gr.Progress()):
65
+ batch_ids = [batch_id.strip() for batch_id in batch_ids_input.split("\n") if batch_id.strip()]
66
+
67
+ zip_files = []
68
  try:
69
+ for batch_id in progress.tqdm(batch_ids, desc="Processing batches"):
70
+ zip_file = download_batch_images(batch_id, progress=progress)
71
+ zip_files.append(zip_file)
72
+ return zip_files # Return the list of zip files for download
73
  except Exception as e:
74
  return str(e)
75
 
 
77
  gr.Markdown("# Batch Image Downloader")
78
 
79
  with gr.Row():
80
+ with gr.Column():
81
+ batch_ids_input = gr.Textbox(label="Batch IDs (one per line)", placeholder="Enter batch IDs, one per line.")
82
+ download_button = gr.Button("Download Images")
83
+ with gr.Column():
84
+ output_files = gr.File(label="Download Zip Files", file_count="multiple")
85
 
 
86
 
87
  download_button.click(
88
  gradio_interface,
89
+ inputs=[batch_ids_input],
90
+ outputs=[output_files]
91
  )
92
 
93
+ app.queue()
 
94
  app.launch()