Spaces:
Running
on
T4
Running
on
T4
File size: 1,595 Bytes
417b347 3b057c5 95803a5 3b057c5 417b347 95803a5 417b347 95803a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import io
import datasets
from PIL import Image
class DemoImages:
_instance = None
def __new__(cls, *args, **kwargs):
if not cls._instance:
cls._instance = super(DemoImages, cls).__new__(cls, *args, **kwargs)
return cls._instance
def __init__(self, url="Riksarkivet/test_images_demo", cache_dir="./helper/examples/.cache_images"):
if not hasattr(self, "images_datasets"):
self.images_datasets = datasets.load_dataset(url, cache_dir=cache_dir, split="train")
self.example_df = self.images_datasets.to_pandas()
self.examples_list = self.convert_bytes_to_images()
def convert_bytes_to_images(self):
examples_list = []
# For each row in the dataframe
for index, row in self.example_df.iterrows():
image_bytes = row["image"]["bytes"]
image = Image.open(io.BytesIO(image_bytes))
# Set the path to save the image
path_to_image = f"./helper/examples/images/image_{index}.jpg"
# Save the image
image.save(path_to_image)
# Get the description
description = row["text"]
# Append to the examples list
examples_list.append([description, path_to_image])
return examples_list
if __name__ == "__main__":
# test = DemoImages(cache_dir=".cache_images")
# print(test.examples_list)
images_datasets = datasets.load_dataset("Riksarkivet/test_images_demo", cache_dir="./helper/examples/.cache_images")
print(images_datasets["train"]["image"][0])
|