Gabriel commited on
Commit
d1d1d97
1 Parent(s): dcc55fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -5
app.py CHANGED
@@ -4,24 +4,30 @@ import shutil
4
  import gradio as gr
5
  from concurrent.futures import ThreadPoolExecutor
6
  from zipfile import ZipFile
 
 
 
7
 
8
  IIIF_URL = "https://lbiiif.riksarkivet.se" #"https://iiifintern.ra.se"
9
 
10
  def get_image_ids(batch_id: str) -> list[str]:
11
  """A list of image IDs in the given batch"""
 
12
  response = requests.get(f"{IIIF_URL}/arkis!{batch_id}/manifest")
13
  response.raise_for_status()
14
  response = response.json()
15
- return [item["id"].split("!")[1][:14] for item in response["items"]]
 
 
16
 
17
  def download_image(url: str, dest: str) -> None:
18
  """
19
  Download an image
20
-
21
  Arguments:
22
  url: Image url
23
  dest: Destination file name
24
  """
 
25
  response = requests.get(url, stream=True)
26
  with open(dest, "wb") as out_file:
27
  shutil.copyfileobj(response.raw, out_file)
@@ -30,7 +36,6 @@ def download_image(url: str, dest: str) -> None:
30
  def download_image_by_image_id(image_id: str):
31
  """
32
  Download the image with the given image ID
33
-
34
  Creates a directory named after the batch ID and saves the image in
35
  that directory.
36
  """
@@ -41,6 +46,7 @@ def download_image_by_image_id(image_id: str):
41
  download_image(url, dest)
42
 
43
  def download_batch_images(batch_id: str, workers: int = 2, progress=None):
 
44
  image_ids = get_image_ids(batch_id)
45
  total_images = len(image_ids)
46
 
@@ -49,16 +55,17 @@ def download_batch_images(batch_id: str, workers: int = 2, progress=None):
49
 
50
  def track_download(image_id):
51
  download_image_by_image_id(image_id)
 
52
  if progress:
53
  # Update progress after each image
54
- current_progress = image_ids.index(image_id) / total_images
55
  progress(current_progress, desc=f"Downloading {image_id}...")
56
 
57
  with ThreadPoolExecutor(max_workers=workers) as executor:
58
  for image_id in image_ids:
59
  executor.submit(track_download, image_id)
60
 
61
- # Zip the folder with downloaded images
62
  zip_filename = f"{batch_id}.zip"
63
  with ZipFile(zip_filename, 'w') as zipf:
64
  for image_id in image_ids:
@@ -68,6 +75,7 @@ def download_batch_images(batch_id: str, workers: int = 2, progress=None):
68
  if progress:
69
  progress(1, desc=f"Completed {batch_id}")
70
 
 
71
  return zip_filename
72
 
73
 
@@ -77,10 +85,12 @@ def gradio_interface(batch_ids_input, progress=gr.Progress()):
77
  zip_files = []
78
  try:
79
  for batch_id in progress.tqdm(batch_ids, desc="Processing batches"):
 
80
  zip_file = download_batch_images(batch_id, progress=progress)
81
  zip_files.append(zip_file)
82
  return zip_files # Return the list of zip files for download
83
  except Exception as e:
 
84
  return str(e)
85
 
86
  with gr.Blocks() as app:
 
4
  import gradio as gr
5
  from concurrent.futures import ThreadPoolExecutor
6
  from zipfile import ZipFile
7
+ import logging
8
+
9
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
10
 
11
  IIIF_URL = "https://lbiiif.riksarkivet.se" #"https://iiifintern.ra.se"
12
 
13
  def get_image_ids(batch_id: str) -> list[str]:
14
  """A list of image IDs in the given batch"""
15
+ logging.info(f"Fetching image IDs for batch {batch_id}")
16
  response = requests.get(f"{IIIF_URL}/arkis!{batch_id}/manifest")
17
  response.raise_for_status()
18
  response = response.json()
19
+ image_ids = [item["id"].split("!")[1][:14] for item in response["items"]]
20
+ logging.info(f"Found {len(image_ids)} images in batch {batch_id}")
21
+ return image_ids
22
 
23
  def download_image(url: str, dest: str) -> None:
24
  """
25
  Download an image
 
26
  Arguments:
27
  url: Image url
28
  dest: Destination file name
29
  """
30
+ logging.info(f"Downloading image from {url} to {dest}")
31
  response = requests.get(url, stream=True)
32
  with open(dest, "wb") as out_file:
33
  shutil.copyfileobj(response.raw, out_file)
 
36
  def download_image_by_image_id(image_id: str):
37
  """
38
  Download the image with the given image ID
 
39
  Creates a directory named after the batch ID and saves the image in
40
  that directory.
41
  """
 
46
  download_image(url, dest)
47
 
48
  def download_batch_images(batch_id: str, workers: int = 2, progress=None):
49
+ logging.info(f"Starting download for batch {batch_id}")
50
  image_ids = get_image_ids(batch_id)
51
  total_images = len(image_ids)
52
 
 
55
 
56
  def track_download(image_id):
57
  download_image_by_image_id(image_id)
58
+ logging.info(f"Downloaded image {image_id}")
59
  if progress:
60
  # Update progress after each image
61
+ current_progress = (image_ids.index(image_id) + 1) / total_images
62
  progress(current_progress, desc=f"Downloading {image_id}...")
63
 
64
  with ThreadPoolExecutor(max_workers=workers) as executor:
65
  for image_id in image_ids:
66
  executor.submit(track_download, image_id)
67
 
68
+ logging.info(f"Zipping downloaded images for batch {batch_id}")
69
  zip_filename = f"{batch_id}.zip"
70
  with ZipFile(zip_filename, 'w') as zipf:
71
  for image_id in image_ids:
 
75
  if progress:
76
  progress(1, desc=f"Completed {batch_id}")
77
 
78
+ logging.info(f"Completed download and zip for batch {batch_id}")
79
  return zip_filename
80
 
81
 
 
85
  zip_files = []
86
  try:
87
  for batch_id in progress.tqdm(batch_ids, desc="Processing batches"):
88
+ logging.info(f"Processing batch {batch_id}")
89
  zip_file = download_batch_images(batch_id, progress=progress)
90
  zip_files.append(zip_file)
91
  return zip_files # Return the list of zip files for download
92
  except Exception as e:
93
+ logging.error(f"Error processing batches: {e}")
94
  return str(e)
95
 
96
  with gr.Blocks() as app: