|
from transformers import CLIPProcessor, CLIPModel |
|
import gradio as gr |
|
|
|
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") |
|
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") |
|
|
|
classes = ["Iron Man", "Captain America", "Thor", "Spider-Man", "Black Widow", "Black Panther","Hulk", "Ant-Man", |
|
'Peggy Carter', "Daredevil", "Star-Lord", "Wong", "Doctor Strange","Nick Fury", "Gamora", "Jessica Jones", |
|
"Nebula", "Falcon", "Winter Soldier", "Rocket", "Hawkeye"] |
|
text = [f"a photo of {x}" for x in classes] |
|
def predict(img): |
|
inputs = processor(text=text, images=img, return_tensors="pt", padding=True) |
|
outputs = model(**inputs) |
|
logits_per_image = outputs.logits_per_image |
|
probs = logits_per_image.softmax(dim=1).squeeze() |
|
return {classes[i] : float(probs[i]) for i in range(len(probs))} |
|
|
|
title = "Marvel Heroes Classification" |
|
description = "Using clip for zero-shot classification" |
|
examples = ["black_panter.jpg"] |
|
gr.Interface(fn=predict, inputs = gr.inputs.Image(shape = (512,512)), outputs= gr.outputs.Label(), |
|
examples=examples, title=title, description=description).launch(inline=False) |