File size: 1,867 Bytes
d60d34b acda6c7 d60d34b acda6c7 ce86d1c acda6c7 ce86d1c acda6c7 3601eff d60d34b b9c781e 3601eff d60d34b 3601eff acda6c7 d60d34b 3601eff d60d34b 3601eff d60d34b 3601eff d60d34b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import torch
import gradio as gr
from transformers import CLIPProcessor, CLIPModel
import spaces
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16").to("cuda")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
@spaces.GPU(duration=120)
def calculate_score(image, text):
labels = text.split(";")
labels = [l.strip() for l in labels]
labels = list(filter(None, labels))
if len(labels) == 0:
return dict()
inputs = processor(text=labels, images=image, return_tensors="pt", padding=True)
inputs = {
k: v.to("cuda") for k, v in inputs.items()
}
outputs = model(**inputs)
logits_per_image = (
outputs.logits_per_image.detach().cpu().numpy()
) # Move results back to CPU for further processing
results_dict = {
label: score / 100.0 for label, score in zip(labels, logits_per_image[0])
}
return results_dict
with gr.Blocks() as demo:
gr.Markdown("# CLIP Score")
gr.Markdown(
"Calculate the [CLIP](https://openai.com/blog/clip/) score of a given image and text"
)
with gr.Row():
image_input = gr.Image()
output_label = gr.Label()
text_input = gr.Textbox(label="Descriptions (separated by semicolons)")
image_input.change(
fn=calculate_score, inputs=[image_input, text_input], outputs=output_label
)
text_input.submit(
fn=calculate_score, inputs=[image_input, text_input], outputs=output_label
)
gr.Examples(
examples=[
[
"cat.jpg",
"a cat stuck in a door; a cat in the air; a cat sitting; a cat standing; a cat is entering the matrix; a cat is entering the void",
]
],
fn=calculate_score,
inputs=[image_input, text_input],
outputs=output_label,
)
demo.launch()
|