Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,28 +1,28 @@
|
|
1 |
import gradio as gr
|
2 |
import open_clip
|
3 |
import torch
|
4 |
-
from PIL import Image
|
5 |
|
6 |
# Load model and tokenizer
|
7 |
-
|
|
|
8 |
tokenizer = open_clip.get_tokenizer('hf-hub:woweenie/open-clip-vit-h-nsfw-finetune')
|
9 |
|
10 |
# Define labels
|
11 |
type_labels = ['2.5d render', '3d render', 'photograph', 'anime drawing', 'drawing', 'illustration', 'painting', 'pre-raphaelite painting', 'concept artwork', 'screenshot']
|
12 |
scene_labels = ['in an airport', 'in the bath', 'on a bed', 'in bed', 'in a bedroom', 'at the beach', 'on a boat', 'in a tent', 'in a car', 'on a chair', 'in the city', 'in a dressing room', 'on the floor', 'at the gym', 'in a hotel room', 'in a kitchen', 'in a living room', 'in an office', 'by a harbor', 'on a bench', 'in a park', 'by a piano', 'on a forest road', 'in a forest', 'in a garden', 'at a lake', 'on the grass', 'on the ground', 'on a paved surface', 'outdoors, on a rock', 'outdoors, on a rug', 'outdoors, on a towel', 'in a photo studio', 'at the pool', 'at a river', 'on a road', 'by the sea', 'showering', 'in the shower', 'on a stool', 'on a rug', 'on a rock', 'on a sofa', 'on a table', 'at a table', 'in a store', 'on snow', 'by a waterfall', 'with a water feature', 'on a windowsill']
|
13 |
expression_labels = ['scared', 'annoyed', 'aroused', 'bored', 'confident', 'distracted', 'dominating', 'embarrassed', 'scared', 'laughing', 'shy', 'orgasm']
|
14 |
-
clothing_labels = ['a bikini that is too small', 'bikini bottoms', 'a bikini top', 'a bikini', 'a bodysuit', 'a bra', 'a crop top', 'a dress', 'garters', 'glasses', 'goggles', 'gym shorts', 'a halter top', 'a hat', 'a handbra', 'a hoodie', 'a jacket', 'jeans', 'a jumper', 'a gown', 'a lace-up top', 'leggings', 'lingerie', 'a long sleeved top', 'a off-shoulder top', 'a nightgown', 'a coat', 'overalls', 'pink pajamas', 'pajamas', 'panties', 'pantyhose', 'a t-shirt', 'a robe', 'a bathrobe', 'a piece of fabric', 'a scarf', 'a shirt', 'shorts', 'a skirt', 'a sleeveless top', 'a slip', 'sneakers', 'tube socks', 'a sports bra', 'sunglasses', 'sweatpants', 'a one piece swimsuit', 'a t-shirt', 'a tank top', 'a tied shirt', 'a top', 'long pants', 'a wetsuit', 'a backpack', 'high hem', 'see-through', 'short', 'tight'
|
15 |
clothing_labels = ['wearing ' + label for label in clothing_labels]
|
16 |
|
17 |
-
def
|
18 |
# Preprocess image
|
19 |
-
image = preprocess(image).unsqueeze(0)
|
20 |
-
|
21 |
# Tokenize labels
|
22 |
-
type_text = tokenizer(type_labels)
|
23 |
-
scene_text = tokenizer(scene_labels)
|
24 |
-
expression_text = tokenizer(expression_labels)
|
25 |
-
clothing_text = tokenizer(clothing_labels)
|
26 |
|
27 |
with torch.no_grad():
|
28 |
# Encode image and text
|
@@ -31,7 +31,7 @@ def process_image(image):
|
|
31 |
scene_text_features = model.encode_text(scene_text)
|
32 |
expression_text_features = model.encode_text(expression_text)
|
33 |
clothing_text_features = model.encode_text(clothing_text)
|
34 |
-
|
35 |
# Normalize features
|
36 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|
37 |
type_text_features /= type_text_features.norm(dim=-1, keepdim=True)
|
@@ -39,11 +39,13 @@ def process_image(image):
|
|
39 |
expression_text_features /= expression_text_features.norm(dim=-1, keepdim=True)
|
40 |
clothing_text_features /= clothing_text_features.norm(dim=-1, keepdim=True)
|
41 |
|
42 |
-
# Calculate
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
47 |
|
48 |
# Convert to dictionaries
|
49 |
type_results = {label: float(type_text_probs[0][i]) for i, label in enumerate(type_labels)}
|
@@ -53,19 +55,68 @@ def process_image(image):
|
|
53 |
|
54 |
return type_results, scene_results, expression_results, clothing_results
|
55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
# Create Gradio interface
|
57 |
-
iface = gr.
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
if __name__ == "__main__":
|
71 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import open_clip
|
3 |
import torch
|
|
|
4 |
|
5 |
# Load model and tokenizer
|
6 |
+
DEVICE='cpu'
|
7 |
+
model, preprocess = open_clip.create_model_from_pretrained('hf-hub:woweenie/open-clip-vit-h-nsfw-finetune', device=DEVICE)
|
8 |
tokenizer = open_clip.get_tokenizer('hf-hub:woweenie/open-clip-vit-h-nsfw-finetune')
|
9 |
|
10 |
# Define labels
|
11 |
type_labels = ['2.5d render', '3d render', 'photograph', 'anime drawing', 'drawing', 'illustration', 'painting', 'pre-raphaelite painting', 'concept artwork', 'screenshot']
|
12 |
scene_labels = ['in an airport', 'in the bath', 'on a bed', 'in bed', 'in a bedroom', 'at the beach', 'on a boat', 'in a tent', 'in a car', 'on a chair', 'in the city', 'in a dressing room', 'on the floor', 'at the gym', 'in a hotel room', 'in a kitchen', 'in a living room', 'in an office', 'by a harbor', 'on a bench', 'in a park', 'by a piano', 'on a forest road', 'in a forest', 'in a garden', 'at a lake', 'on the grass', 'on the ground', 'on a paved surface', 'outdoors, on a rock', 'outdoors, on a rug', 'outdoors, on a towel', 'in a photo studio', 'at the pool', 'at a river', 'on a road', 'by the sea', 'showering', 'in the shower', 'on a stool', 'on a rug', 'on a rock', 'on a sofa', 'on a table', 'at a table', 'in a store', 'on snow', 'by a waterfall', 'with a water feature', 'on a windowsill']
|
13 |
expression_labels = ['scared', 'annoyed', 'aroused', 'bored', 'confident', 'distracted', 'dominating', 'embarrassed', 'scared', 'laughing', 'shy', 'orgasm']
|
14 |
+
clothing_labels = ['a bikini that is too small', 'bikini bottoms', 'a bikini top', 'a bikini', 'a bodysuit', 'a bra', 'a crop top', 'a dress', 'garters', 'glasses', 'goggles', 'gym shorts', 'a halter top', 'a hat', 'a handbra', 'a hoodie', 'a jacket', 'jeans', 'a jumper', 'a gown', 'a lace-up top', 'leggings', 'lingerie', 'a long sleeved top', 'a off-shoulder top', 'a nightgown', 'a coat', 'overalls', 'pink pajamas', 'pajamas', 'panties', 'pantyhose', 'a t-shirt', 'a robe', 'a bathrobe', 'a piece of fabric', 'a scarf', 'a shirt', 'shorts', 'a skirt', 'a sleeveless top', 'a slip', 'sneakers', 'tube socks', 'a sports bra', 'sunglasses', 'sweatpants', 'a one piece swimsuit', 'a t-shirt', 'a tank top', 'a tied shirt', 'a top', 'long pants', 'a wetsuit', 'a backpack', 'high hem', 'see-through', 'short', 'tight']
|
15 |
clothing_labels = ['wearing ' + label for label in clothing_labels]
|
16 |
|
17 |
+
def process_image_separate_tags(image):
|
18 |
# Preprocess image
|
19 |
+
image = preprocess(image).unsqueeze(0).to(DEVICE)
|
20 |
+
|
21 |
# Tokenize labels
|
22 |
+
type_text = tokenizer(type_labels).to(DEVICE)
|
23 |
+
scene_text = tokenizer(scene_labels).to(DEVICE)
|
24 |
+
expression_text = tokenizer(expression_labels).to(DEVICE)
|
25 |
+
clothing_text = tokenizer(clothing_labels).to(DEVICE)
|
26 |
|
27 |
with torch.no_grad():
|
28 |
# Encode image and text
|
|
|
31 |
scene_text_features = model.encode_text(scene_text)
|
32 |
expression_text_features = model.encode_text(expression_text)
|
33 |
clothing_text_features = model.encode_text(clothing_text)
|
34 |
+
|
35 |
# Normalize features
|
36 |
image_features /= image_features.norm(dim=-1, keepdim=True)
|
37 |
type_text_features /= type_text_features.norm(dim=-1, keepdim=True)
|
|
|
39 |
expression_text_features /= expression_text_features.norm(dim=-1, keepdim=True)
|
40 |
clothing_text_features /= clothing_text_features.norm(dim=-1, keepdim=True)
|
41 |
|
42 |
+
# Calculate cosine similarities and apply softmax
|
43 |
+
# Using temperature parameter to control the "sharpness" of the distribution
|
44 |
+
temperature = 0.1 # Lower values make the distribution more peaked
|
45 |
+
type_text_probs = torch.softmax(image_features @ type_text_features.T / temperature, dim=-1)
|
46 |
+
scene_text_probs = torch.softmax(image_features @ scene_text_features.T / temperature, dim=-1)
|
47 |
+
expression_text_probs = torch.softmax(image_features @ expression_text_features.T / temperature, dim=-1)
|
48 |
+
clothing_text_probs = torch.softmax(image_features @ clothing_text_features.T / temperature, dim=-1)
|
49 |
|
50 |
# Convert to dictionaries
|
51 |
type_results = {label: float(type_text_probs[0][i]) for i, label in enumerate(type_labels)}
|
|
|
55 |
|
56 |
return type_results, scene_results, expression_results, clothing_results
|
57 |
|
58 |
+
def process_image_combined_tags(image):
|
59 |
+
# Preprocess image
|
60 |
+
image = preprocess(image).unsqueeze(0).to(DEVICE)
|
61 |
+
|
62 |
+
# Tokenize labels
|
63 |
+
all_text = tokenizer(type_labels + scene_labels + expression_labels + clothing_labels).to(DEVICE)
|
64 |
+
|
65 |
+
with torch.no_grad():
|
66 |
+
# Encode image and text
|
67 |
+
image_features = model.encode_image(image)
|
68 |
+
all_text_features = model.encode_text(all_text)
|
69 |
+
|
70 |
+
# Normalize features
|
71 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
72 |
+
all_text_features /= all_text_features.norm(dim=-1, keepdim=True)
|
73 |
+
|
74 |
+
# Calculate cosine similarities and apply softmax
|
75 |
+
# Using temperature parameter to control the "sharpness" of the distribution
|
76 |
+
temperature = 0.1 # Lower values make the distribution more peaked
|
77 |
+
cosine_similarities = image_features @ all_text_features.T
|
78 |
+
all_text_probs = torch.softmax(cosine_similarities / temperature, dim=-1)
|
79 |
+
|
80 |
+
# Convert to dictionaries
|
81 |
+
all_results = {label: float(all_text_probs[0][i]) for i, label in enumerate(type_labels + scene_labels + expression_labels + clothing_labels)}
|
82 |
+
|
83 |
+
return all_results
|
84 |
+
|
85 |
# Create Gradio interface
|
86 |
+
iface = gr.Blocks(title="NSFW Tagging with Finetuned CLIP")
|
87 |
+
|
88 |
+
with iface:
|
89 |
+
gr.Markdown("# NSFW Tagging with Finetuned CLIP")
|
90 |
+
gr.Markdown("Upload an image to analyze its content across multiple NSFW categories.")
|
91 |
+
gr.Markdown("Uses [woweenie/open-clip-vit-h-nsfw-finetune](https://huggingface.co/woweenie/open-clip-vit-h-nsfw-finetune) finetuned on NSFW images.")
|
92 |
+
gr.Markdown("Disclaimer: This model is not perfect and may make mistakes. Use at your own risk.")
|
93 |
+
|
94 |
+
with gr.Tabs():
|
95 |
+
with gr.Tab("Combined Predictions"):
|
96 |
+
with gr.Row():
|
97 |
+
image_input1 = gr.Image(type="pil", label="Upload Image")
|
98 |
+
combined_output = gr.Label(label="Predicted Tags", num_top_classes=10)
|
99 |
+
predict_btn1 = gr.Button("Analyze")
|
100 |
+
predict_btn1.click(
|
101 |
+
fn=process_image_combined_tags,
|
102 |
+
inputs=image_input1,
|
103 |
+
outputs=combined_output
|
104 |
+
)
|
105 |
+
|
106 |
+
with gr.Tab("Categorical Predictions"):
|
107 |
+
with gr.Row():
|
108 |
+
image_input2 = gr.Image(type="pil", label="Upload Image")
|
109 |
+
with gr.Row():
|
110 |
+
type_output = gr.Label(label="Predicted Type", num_top_classes=3)
|
111 |
+
scene_output = gr.Label(label="Predicted Scene", num_top_classes=10)
|
112 |
+
expression_output = gr.Label(label="Predicted Expression", num_top_classes=3)
|
113 |
+
clothing_output = gr.Label(label="Predicted Clothing", num_top_classes=5)
|
114 |
+
predict_btn2 = gr.Button("Analyze")
|
115 |
+
predict_btn2.click(
|
116 |
+
fn=process_image_separate_tags,
|
117 |
+
inputs=image_input2,
|
118 |
+
outputs=[type_output, scene_output, expression_output, clothing_output]
|
119 |
+
)
|
120 |
|
121 |
if __name__ == "__main__":
|
122 |
+
iface.launch()
|