File size: 1,354 Bytes
8e2b754 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import jax
import torch
from torchvision.io import ImageReadMode, read_image
from transformers import AutoTokenizer
from modeling_hybrid_clip import FlaxHybridCLIP
from run_hybrid_clip import Transform
model = FlaxHybridCLIP.from_pretrained("clip_spanish_1_percent")
tokenizer = AutoTokenizer.from_pretrained("dccuchile/bert-base-spanish-wwm-cased")
def prepare_image(image_path):
image = read_image(image_path, mode=ImageReadMode.RGB)
preprocess = Transform(model.config.vision_config.image_size)
preprocess = torch.jit.script(preprocess)
preprocessed_image = preprocess(image)
pixel_values = torch.stack([preprocessed_image]).permute(0, 2, 3, 1).numpy()
return pixel_values
def prepare_text(text):
return tokenizer(text, return_tensors="np")
def run_inference(image_path, text):
pixel_values = prepare_image(image_path)
input_text = prepare_text(text)
model_output = model(input_text["input_ids"], pixel_values, attention_mask=input_text["attention_mask"], token_type_ids=input_text["token_type_ids"], train=False, return_dict=True)
logits = model_output["logits_per_image"]
score = jax.nn.sigmoid(logits)
return score
image_path = "/home/eduardogonzalezponferrada/data/wit/full_dataset/Casa_de_Cultura_%284%29.JPG"
text = "Patio interior de un edificio"
print(run_inference(image_path, text)) |