File size: 1,264 Bytes
f573bf8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
from transformers import CLIPProcessor, CLIPModel
import gradio as gr

model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

classes = ["Iron Man", "Captain America", "Thor", "Spider-Man", "Black Widow", "Black Panther","Hulk", "Ant-Man",
           'Peggy Carter', "Daredevil", "Star-Lord", "Wong", "Doctor Strange","Nick Fury", "Gamora", "Jessica Jones",
           "Nebula", "Falcon", "Winter Soldier", "Rocket", "Hawkeye"]
text = [f"a photo of {x}" for x in classes]
def predict(img):
  inputs = processor(text=text, images=img, return_tensors="pt", padding=True)
  outputs = model(**inputs)
  logits_per_image = outputs.logits_per_image # this is the image-text similarity score
  probs = logits_per_image.softmax(dim=1).squeeze() # we can take the softmax to get the label probabilities
  return {classes[i] : float(probs[i]) for i in range(len(probs))}

title = "Marvel Heroes Classification"
description = "Using clip for zero-shot classification"
examples = ["black_panter.jpg"]
gr.Interface(fn=predict, inputs = gr.inputs.Image(shape = (512,512)), outputs= gr.outputs.Label(),
             examples=examples, title=title, description=description).launch(inline=False)