CLIP / app.py
Steven Anderson
CLIP
3153b84
raw
history blame
1 kB
import gradio as gr
import torch
import clip
from PIL import Image
def process(device, image, prompt):
print("Inferring...")
image = preprocess(image).unsqueeze(0).to(device)
print(image)
prompts = prompt.split("\n")
text = clip.tokenize(prompts).to(device)
print(text)
with torch.no_grad():
logits_per_image, logits_per_text = model(image, text)
probs = logits_per_image.softmax(dim=-1).cpu().numpy()
print(probs)
return dict(zip(prompts, probs[0]))
if __name__ == "__main__":
print("Getting device...")
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Loading model...")
model, preprocess = clip.load("ViT-B/32", device=device)
print("Loaded model.")
iface = gr.Interface(
fn=lambda i, p: process(device, i, p),
inputs=[
gr.Image(),
gr.Textbox(lines=5, label="Prompts (newline-separated)"),
],
outputs="label",
)
iface.launch()