Spaces:
Running
on
T4
Running
on
T4
File size: 1,160 Bytes
417b347 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import io
import datasets
from PIL import Image
class DemoImages:
def __init__(self, url="Riksarkivet/test_images_demo", cache_dir="./helper/examples/.cache_images") -> None:
self.images_datasets = datasets.load_dataset(url, cache_dir=cache_dir)
self.example_df = self.images_datasets["train"].to_pandas()
self.examples_list = self.convert_bytes_to_images()
def convert_bytes_to_images(self):
examples_list = []
# For each row in the dataframe
for index, row in self.example_df.iterrows():
image_bytes = row["image"]["bytes"]
image = Image.open(io.BytesIO(image_bytes))
# Set the path to save the image
path_to_image = f"./helper/examples/images/image_{index}.jpg"
# Save the image
image.save(path_to_image)
# Get the description
description = row["text"]
# Append to the examples list
examples_list.append([description, path_to_image])
return examples_list
if __name__ == "__main__":
test = DemoImages(cache_dir=".cache_images")
print(test.examples_list)
|