maken-clip-text / image_vectorizer_clip.py
versae's picture
Upload image_vectorizer_clip.py
0649211
import csv
import json
import os
import time
import urllib.request
import pandas as pd
import numpy as np
from pathlib import Path
from multiprocessing.dummy import Pool
from tqdm import tqdm
from transformers import CLIPProcessor, CLIPModel
from PIL import Image, ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
def compute_image_embeddings(list_of_images):
return model.get_image_features(
**processor(images=list_of_images, return_tensors="pt", padding=True)
)
def load_image(path, same_height=False):
im = Image.open(path)
if im.mode != 'RGB':
im = im.convert('RGB')
if same_height:
ratio = 224 / im.size[1]
else:
ratio = 224 / min(im.size)
return im.resize((int(im.size[0] * ratio), int(im.size[1] * ratio)))
def main():
embeddings = None
rows = [["id", "label", "thumbnail"]]
total = sum(1 for _ in Path("./vectors_20211011").rglob("**/*.jpg"))
images_path = Path("./vectors_20211011").rglob("**/*.jpg")
for i, image_path in enumerate(tqdm(images_path, total=total)):
embedding = compute_image_embeddings(
[load_image(image_path)]
).detach().numpy()[0]
if embeddings is None:
embeddings = embedding
else:
embeddings = np.vstack([embeddings, embedding])
filename = image_path.as_posix()
record_path = (filename
.replace("vectors", "records")
.replace(".jpg", ".json")
)
with open(record_path) as record_file:
record = json.load(record_file)
rows.append([
image_path.stem,
record["metadata"]["title"],
record["_links"]["thumbnail_large"]["href"]
])
if i and i % 1000 == 0:
with open("clip.csv", "w") as clip_file:
writer = csv.writer(clip_file)
for row in tqdm(rows, desc="Writing rows and embeddings"):
writer.writerow(row)
np.save("clip.npy", embeddings)
with open("clip.csv", "w") as clip_file:
writer = csv.writer(clip_file)
for row in tqdm(rows, desc="Writing rows and embeddings"):
writer.writerow(row)
np.save("clip.npy", embeddings)
if __name__ == "__main__":
main()