import torch import clip from PIL import Image import gradio as gr device = "cuda" if torch.cuda.is_available() else "cpu" model, preprocess = clip.load("ViT-B/32", device=device) def hotornot(image, gender): image = Image.fromarray(image.astype("uint8"), "RGB") image = preprocess(image).unsqueeze(0).to(device) positive_terms = [f'a hot {gender}', f'a beautiful {gender}', f'an attractive {gender}'] negative_terms = [f'a gross {gender}', f'an ugly {gender}', f'a hideous {gender}'] pairs = list(zip(positive_terms, negative_terms)) def evaluate(terms): text = clip.tokenize(terms).to(device) with torch.no_grad(): logits_per_image, logits_per_text = model(image, text) probs = logits_per_image.softmax(dim=-1).cpu().numpy() return probs[0] probs = [evaluate(pair) for pair in pairs] positive_probs = [prob[0] for prob in probs] negative_probs = [prob[1] for prob in probs] print("+:", positive_probs) print("-:", negative_probs) hot_score = sum(positive_probs)/len(positive_probs) ugly_score = sum(negative_probs)/len(negative_probs) print(hot_score, ugly_score) composite = ((hot_score - ugly_score)+1) * 50 composite = round(composite, 2) return composite iface = gr.Interface( fn=hotornot, inputs=[ gr.inputs.Image(label="Image"), gr.inputs.Dropdown( [ 'person', 'man', 'woman' ], default='person', ) ], outputs="number", title="Hot or Not", description="A simple hot or not app using OpenAI's CLIP model.", ) iface.launch()