fcakyon commited on
Commit
66b9ebf
1 Parent(s): 49d6667

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -29
app.py CHANGED
@@ -1,28 +1,28 @@
1
  import gradio as gr
2
  import open_clip
3
  import torch
4
- from PIL import Image
5
 
6
  # Load model and tokenizer
7
- model, preprocess = open_clip.create_model_from_pretrained('hf-hub:woweenie/open-clip-vit-h-nsfw-finetune', device='cpu')
 
8
  tokenizer = open_clip.get_tokenizer('hf-hub:woweenie/open-clip-vit-h-nsfw-finetune')
9
 
10
  # Define labels
11
  type_labels = ['2.5d render', '3d render', 'photograph', 'anime drawing', 'drawing', 'illustration', 'painting', 'pre-raphaelite painting', 'concept artwork', 'screenshot']
12
  scene_labels = ['in an airport', 'in the bath', 'on a bed', 'in bed', 'in a bedroom', 'at the beach', 'on a boat', 'in a tent', 'in a car', 'on a chair', 'in the city', 'in a dressing room', 'on the floor', 'at the gym', 'in a hotel room', 'in a kitchen', 'in a living room', 'in an office', 'by a harbor', 'on a bench', 'in a park', 'by a piano', 'on a forest road', 'in a forest', 'in a garden', 'at a lake', 'on the grass', 'on the ground', 'on a paved surface', 'outdoors, on a rock', 'outdoors, on a rug', 'outdoors, on a towel', 'in a photo studio', 'at the pool', 'at a river', 'on a road', 'by the sea', 'showering', 'in the shower', 'on a stool', 'on a rug', 'on a rock', 'on a sofa', 'on a table', 'at a table', 'in a store', 'on snow', 'by a waterfall', 'with a water feature', 'on a windowsill']
13
  expression_labels = ['scared', 'annoyed', 'aroused', 'bored', 'confident', 'distracted', 'dominating', 'embarrassed', 'scared', 'laughing', 'shy', 'orgasm']
14
- clothing_labels = ['a bikini that is too small', 'bikini bottoms', 'a bikini top', 'a bikini', 'a bodysuit', 'a bra', 'a crop top', 'a dress', 'garters', 'glasses', 'goggles', 'gym shorts', 'a halter top', 'a hat', 'a handbra', 'a hoodie', 'a jacket', 'jeans', 'a jumper', 'a gown', 'a lace-up top', 'leggings', 'lingerie', 'a long sleeved top', 'a off-shoulder top', 'a nightgown', 'a coat', 'overalls', 'pink pajamas', 'pajamas', 'panties', 'pantyhose', 'a t-shirt', 'a robe', 'a bathrobe', 'a piece of fabric', 'a scarf', 'a shirt', 'shorts', 'a skirt', 'a sleeveless top', 'a slip', 'sneakers', 'tube socks', 'a sports bra', 'sunglasses', 'sweatpants', 'a one piece swimsuit', 'a t-shirt', 'a tank top', 'a tied shirt', 'a top', 'long pants', 'a wetsuit', 'a backpack', 'high hem', 'see-through', 'short', 'tight','visible nipples']
15
  clothing_labels = ['wearing ' + label for label in clothing_labels]
16
 
17
- def process_image(image):
18
  # Preprocess image
19
- image = preprocess(image).unsqueeze(0)
20
-
21
  # Tokenize labels
22
- type_text = tokenizer(type_labels)
23
- scene_text = tokenizer(scene_labels)
24
- expression_text = tokenizer(expression_labels)
25
- clothing_text = tokenizer(clothing_labels)
26
 
27
  with torch.no_grad():
28
  # Encode image and text
@@ -31,7 +31,7 @@ def process_image(image):
31
  scene_text_features = model.encode_text(scene_text)
32
  expression_text_features = model.encode_text(expression_text)
33
  clothing_text_features = model.encode_text(clothing_text)
34
-
35
  # Normalize features
36
  image_features /= image_features.norm(dim=-1, keepdim=True)
37
  type_text_features /= type_text_features.norm(dim=-1, keepdim=True)
@@ -39,11 +39,13 @@ def process_image(image):
39
  expression_text_features /= expression_text_features.norm(dim=-1, keepdim=True)
40
  clothing_text_features /= clothing_text_features.norm(dim=-1, keepdim=True)
41
 
42
- # Calculate probabilities
43
- type_text_probs = (100.0 * image_features @ type_text_features.T).sigmoid(dim=-1)
44
- scene_text_probs = (100.0 * image_features @ scene_text_features.T).sigmoid(dim=-1)
45
- expression_text_probs = (100.0 * image_features @ expression_text_features.T).sigmoid(dim=-1)
46
- clothing_text_probs = (100.0 * image_features @ clothing_text_features.T).sigmoid(dim=-1)
 
 
47
 
48
  # Convert to dictionaries
49
  type_results = {label: float(type_text_probs[0][i]) for i, label in enumerate(type_labels)}
@@ -53,19 +55,68 @@ def process_image(image):
53
 
54
  return type_results, scene_results, expression_results, clothing_results
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # Create Gradio interface
57
- iface = gr.Interface(
58
- fn=process_image,
59
- inputs=gr.Image(type="pil", label="Upload Image"),
60
- outputs=[
61
- gr.Label(label="Type Classification", num_top_classes=8),
62
- gr.Label(label="Scene Classification", num_top_classes=8),
63
- gr.Label(label="Expression Classification", num_top_classes=8),
64
- gr.Label(label="Clothing Classification", num_top_classes=8)
65
- ],
66
- title="Image Content Moderation",
67
- description="Upload an image to analyze its content across multiple categories."
68
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  if __name__ == "__main__":
71
- iface.launch()
 
1
  import gradio as gr
2
  import open_clip
3
  import torch
 
4
 
5
  # Load model and tokenizer
6
+ DEVICE='cpu'
7
+ model, preprocess = open_clip.create_model_from_pretrained('hf-hub:woweenie/open-clip-vit-h-nsfw-finetune', device=DEVICE)
8
  tokenizer = open_clip.get_tokenizer('hf-hub:woweenie/open-clip-vit-h-nsfw-finetune')
9
 
10
  # Define labels
11
  type_labels = ['2.5d render', '3d render', 'photograph', 'anime drawing', 'drawing', 'illustration', 'painting', 'pre-raphaelite painting', 'concept artwork', 'screenshot']
12
  scene_labels = ['in an airport', 'in the bath', 'on a bed', 'in bed', 'in a bedroom', 'at the beach', 'on a boat', 'in a tent', 'in a car', 'on a chair', 'in the city', 'in a dressing room', 'on the floor', 'at the gym', 'in a hotel room', 'in a kitchen', 'in a living room', 'in an office', 'by a harbor', 'on a bench', 'in a park', 'by a piano', 'on a forest road', 'in a forest', 'in a garden', 'at a lake', 'on the grass', 'on the ground', 'on a paved surface', 'outdoors, on a rock', 'outdoors, on a rug', 'outdoors, on a towel', 'in a photo studio', 'at the pool', 'at a river', 'on a road', 'by the sea', 'showering', 'in the shower', 'on a stool', 'on a rug', 'on a rock', 'on a sofa', 'on a table', 'at a table', 'in a store', 'on snow', 'by a waterfall', 'with a water feature', 'on a windowsill']
13
  expression_labels = ['scared', 'annoyed', 'aroused', 'bored', 'confident', 'distracted', 'dominating', 'embarrassed', 'scared', 'laughing', 'shy', 'orgasm']
14
+ clothing_labels = ['a bikini that is too small', 'bikini bottoms', 'a bikini top', 'a bikini', 'a bodysuit', 'a bra', 'a crop top', 'a dress', 'garters', 'glasses', 'goggles', 'gym shorts', 'a halter top', 'a hat', 'a handbra', 'a hoodie', 'a jacket', 'jeans', 'a jumper', 'a gown', 'a lace-up top', 'leggings', 'lingerie', 'a long sleeved top', 'a off-shoulder top', 'a nightgown', 'a coat', 'overalls', 'pink pajamas', 'pajamas', 'panties', 'pantyhose', 'a t-shirt', 'a robe', 'a bathrobe', 'a piece of fabric', 'a scarf', 'a shirt', 'shorts', 'a skirt', 'a sleeveless top', 'a slip', 'sneakers', 'tube socks', 'a sports bra', 'sunglasses', 'sweatpants', 'a one piece swimsuit', 'a t-shirt', 'a tank top', 'a tied shirt', 'a top', 'long pants', 'a wetsuit', 'a backpack', 'high hem', 'see-through', 'short', 'tight']
15
  clothing_labels = ['wearing ' + label for label in clothing_labels]
16
 
17
+ def process_image_separate_tags(image):
18
  # Preprocess image
19
+ image = preprocess(image).unsqueeze(0).to(DEVICE)
20
+
21
  # Tokenize labels
22
+ type_text = tokenizer(type_labels).to(DEVICE)
23
+ scene_text = tokenizer(scene_labels).to(DEVICE)
24
+ expression_text = tokenizer(expression_labels).to(DEVICE)
25
+ clothing_text = tokenizer(clothing_labels).to(DEVICE)
26
 
27
  with torch.no_grad():
28
  # Encode image and text
 
31
  scene_text_features = model.encode_text(scene_text)
32
  expression_text_features = model.encode_text(expression_text)
33
  clothing_text_features = model.encode_text(clothing_text)
34
+
35
  # Normalize features
36
  image_features /= image_features.norm(dim=-1, keepdim=True)
37
  type_text_features /= type_text_features.norm(dim=-1, keepdim=True)
 
39
  expression_text_features /= expression_text_features.norm(dim=-1, keepdim=True)
40
  clothing_text_features /= clothing_text_features.norm(dim=-1, keepdim=True)
41
 
42
+ # Calculate cosine similarities and apply softmax
43
+ # Using temperature parameter to control the "sharpness" of the distribution
44
+ temperature = 0.1 # Lower values make the distribution more peaked
45
+ type_text_probs = torch.softmax(image_features @ type_text_features.T / temperature, dim=-1)
46
+ scene_text_probs = torch.softmax(image_features @ scene_text_features.T / temperature, dim=-1)
47
+ expression_text_probs = torch.softmax(image_features @ expression_text_features.T / temperature, dim=-1)
48
+ clothing_text_probs = torch.softmax(image_features @ clothing_text_features.T / temperature, dim=-1)
49
 
50
  # Convert to dictionaries
51
  type_results = {label: float(type_text_probs[0][i]) for i, label in enumerate(type_labels)}
 
55
 
56
  return type_results, scene_results, expression_results, clothing_results
57
 
58
+ def process_image_combined_tags(image):
59
+ # Preprocess image
60
+ image = preprocess(image).unsqueeze(0).to(DEVICE)
61
+
62
+ # Tokenize labels
63
+ all_text = tokenizer(type_labels + scene_labels + expression_labels + clothing_labels).to(DEVICE)
64
+
65
+ with torch.no_grad():
66
+ # Encode image and text
67
+ image_features = model.encode_image(image)
68
+ all_text_features = model.encode_text(all_text)
69
+
70
+ # Normalize features
71
+ image_features /= image_features.norm(dim=-1, keepdim=True)
72
+ all_text_features /= all_text_features.norm(dim=-1, keepdim=True)
73
+
74
+ # Calculate cosine similarities and apply softmax
75
+ # Using temperature parameter to control the "sharpness" of the distribution
76
+ temperature = 0.1 # Lower values make the distribution more peaked
77
+ cosine_similarities = image_features @ all_text_features.T
78
+ all_text_probs = torch.softmax(cosine_similarities / temperature, dim=-1)
79
+
80
+ # Convert to dictionaries
81
+ all_results = {label: float(all_text_probs[0][i]) for i, label in enumerate(type_labels + scene_labels + expression_labels + clothing_labels)}
82
+
83
+ return all_results
84
+
85
  # Create Gradio interface
86
+ iface = gr.Blocks(title="NSFW Tagging with Finetuned CLIP")
87
+
88
+ with iface:
89
+ gr.Markdown("# NSFW Tagging with Finetuned CLIP")
90
+ gr.Markdown("Upload an image to analyze its content across multiple NSFW categories.")
91
+ gr.Markdown("Uses [woweenie/open-clip-vit-h-nsfw-finetune](https://huggingface.co/woweenie/open-clip-vit-h-nsfw-finetune) finetuned on NSFW images.")
92
+ gr.Markdown("Disclaimer: This model is not perfect and may make mistakes. Use at your own risk.")
93
+
94
+ with gr.Tabs():
95
+ with gr.Tab("Combined Predictions"):
96
+ with gr.Row():
97
+ image_input1 = gr.Image(type="pil", label="Upload Image")
98
+ combined_output = gr.Label(label="Predicted Tags", num_top_classes=10)
99
+ predict_btn1 = gr.Button("Analyze")
100
+ predict_btn1.click(
101
+ fn=process_image_combined_tags,
102
+ inputs=image_input1,
103
+ outputs=combined_output
104
+ )
105
+
106
+ with gr.Tab("Categorical Predictions"):
107
+ with gr.Row():
108
+ image_input2 = gr.Image(type="pil", label="Upload Image")
109
+ with gr.Row():
110
+ type_output = gr.Label(label="Predicted Type", num_top_classes=3)
111
+ scene_output = gr.Label(label="Predicted Scene", num_top_classes=10)
112
+ expression_output = gr.Label(label="Predicted Expression", num_top_classes=3)
113
+ clothing_output = gr.Label(label="Predicted Clothing", num_top_classes=5)
114
+ predict_btn2 = gr.Button("Analyze")
115
+ predict_btn2.click(
116
+ fn=process_image_separate_tags,
117
+ inputs=image_input2,
118
+ outputs=[type_output, scene_output, expression_output, clothing_output]
119
+ )
120
 
121
  if __name__ == "__main__":
122
+ iface.launch()