Spaces:
Runtime error
Runtime error
geonmo.gu
commited on
Commit
β’
fba8607
1
Parent(s):
6019f50
initial commit
Browse files- README.md +6 -4
- app.py +277 -0
- k21-1.jpg +0 -0
- prompts/categories_places365.txt +365 -0
- prompts/extract_text_features.py +154 -0
- prompts/openimage-classnames.csv +0 -0
- prompts/place365-classnames.txt +365 -0
- prompts/tencent-ml-classnames.txt +0 -0
- prompts/tencent-ml-images.txt +0 -0
- requirements.txt +8 -0
README.md
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
---
|
2 |
-
title: Socratic Models Image Captioning
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.1.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
|
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Socratic Models Image Captioning
|
3 |
+
emoji: π
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: blue
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.1.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
+
models:
|
11 |
+
- bigscience/bloom
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
import gradio as gr
|
4 |
+
import time
|
5 |
+
import clip
|
6 |
+
#from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
7 |
+
#from flores200_codes import flores_codes
|
8 |
+
import requests
|
9 |
+
import csv
|
10 |
+
import json
|
11 |
+
import wget
|
12 |
+
|
13 |
+
url_dict = {'clip_ViTL14_openimage_classifier_weights.pt': 'https://raw.githubusercontent.com/geonm/socratic-models-demo/master/prompts/clip_ViTL14_openimage_classifier_weights.pt',
|
14 |
+
'clip_ViTL14_place365_classifier_weights.pt': 'https://raw.githubusercontent.com/geonm/socratic-models-demo/master/prompts/clip_ViTL14_place365_classifier_weights.pt',
|
15 |
+
'clip_ViTL14_tencentml_classifier_weights.pt': 'https://raw.githubusercontent.com/geonm/socratic-models-demo/master/prompts/clip_ViTL14_tencentml_classifier_weights.pt'}
|
16 |
+
|
17 |
+
os.makedirs('./prompts', exist_ok=True)
|
18 |
+
for k, v in url_dict.items():
|
19 |
+
wget.download(v, out='./prompts')
|
20 |
+
|
21 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = ''
|
22 |
+
|
23 |
+
API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
|
24 |
+
HF_TOKEN = os.environ["HF_TOKEN"]
|
25 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
26 |
+
|
27 |
+
def load_openimage_classnames(csv_path):
|
28 |
+
csv_data = open(csv_path)
|
29 |
+
csv_reader = csv.reader(csv_data)
|
30 |
+
classnames = {idx: row[-1] for idx, row in enumerate(csv_reader)}
|
31 |
+
return classnames
|
32 |
+
|
33 |
+
|
34 |
+
def load_tencentml_classnames(txt_path):
|
35 |
+
txt_data = open(txt_path)
|
36 |
+
lines = txt_data.readlines()
|
37 |
+
classnames = {idx: line.strip() for idx, line in enumerate(lines)}
|
38 |
+
return classnames
|
39 |
+
|
40 |
+
|
41 |
+
def build_simple_classifier(clip_model, text_list, template, device):
|
42 |
+
with torch.no_grad():
|
43 |
+
texts = [template(text) for text in text_list]
|
44 |
+
text_inputs = clip.tokenize(texts).to(device)
|
45 |
+
text_features = clip_model.encode_text(text_inputs)
|
46 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
47 |
+
|
48 |
+
return text_features, {idx: text for idx, text in enumerate(text_list)}
|
49 |
+
|
50 |
+
|
51 |
+
def load_models():
|
52 |
+
# build model and tokenizer
|
53 |
+
model_dict = {}
|
54 |
+
|
55 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
56 |
+
print('\tLoading CLIP ViT-L/14')
|
57 |
+
clip_model, clip_preprocess = clip.load("ViT-L/14", device=device)
|
58 |
+
print('\tLoading precomputed zeroshot classifier')
|
59 |
+
openimage_classifier_weights = torch.load('./prompts/clip_ViTL14_openimage_classifier_weights.pt', map_location=device).type(torch.FloatTensor)
|
60 |
+
openimage_classnames = load_openimage_classnames('./prompts/openimage-classnames.csv')
|
61 |
+
tencentml_classifier_weights = torch.load('./prompts/clip_ViTL14_tencentml_classifier_weights.pt', map_location=device).type(torch.FloatTensor)
|
62 |
+
tencentml_classnames = load_tencentml_classnames('./prompts/tencent-ml-classnames.txt')
|
63 |
+
place365_classifier_weights = torch.load('./prompts/clip_ViTL14_place365_classifier_weights.pt', map_location=device).type(torch.FloatTensor)
|
64 |
+
place365_classnames = load_tencentml_classnames('./prompts/place365-classnames.txt')
|
65 |
+
|
66 |
+
print('\tBuilding simple zeroshot classifier')
|
67 |
+
img_types = ['photo', 'cartoon', 'sketch', 'painting']
|
68 |
+
ppl_texts = ['no people', 'people']
|
69 |
+
ifppl_texts = ['is one person', 'are two people', 'are three people', 'are several people', 'are many people']
|
70 |
+
imgtype_classifier_weights, imgtype_classnames = build_simple_classifier(clip_model, img_types, lambda c: f'This is a {c}.', device)
|
71 |
+
ppl_classifier_weights, ppl_classnames = build_simple_classifier(clip_model, ppl_texts, lambda c: f'There are {c} in this photo.', device)
|
72 |
+
ifppl_classifier_weights, ifppl_classnames = build_simple_classifier(clip_model, ifppl_texts, lambda c: f'There {c} in this photo.', device)
|
73 |
+
|
74 |
+
model_dict['clip_model'] = clip_model
|
75 |
+
model_dict['clip_preprocess'] = clip_preprocess
|
76 |
+
model_dict['openimage_classifier_weights'] = openimage_classifier_weights
|
77 |
+
model_dict['openimage_classnames'] = openimage_classnames
|
78 |
+
model_dict['tencentml_classifier_weights'] = tencentml_classifier_weights
|
79 |
+
model_dict['tencentml_classnames'] = tencentml_classnames
|
80 |
+
model_dict['place365_classifier_weights'] = place365_classifier_weights
|
81 |
+
model_dict['place365_classnames'] = place365_classnames
|
82 |
+
model_dict['imgtype_classifier_weights'] = imgtype_classifier_weights
|
83 |
+
model_dict['imgtype_classnames'] = imgtype_classnames
|
84 |
+
model_dict['ppl_classifier_weights'] = ppl_classifier_weights
|
85 |
+
model_dict['ppl_classnames'] = ppl_classnames
|
86 |
+
model_dict['ifppl_classifier_weights'] = ifppl_classifier_weights
|
87 |
+
model_dict['ifppl_classnames'] = ifppl_classnames
|
88 |
+
model_dict['device'] = device
|
89 |
+
|
90 |
+
return model_dict
|
91 |
+
|
92 |
+
|
93 |
+
def drop_gpu(tensor):
|
94 |
+
if torch.cuda.is_available():
|
95 |
+
return tensor.cpu().numpy()
|
96 |
+
else:
|
97 |
+
return tensor.numpy()
|
98 |
+
|
99 |
+
|
100 |
+
def zeroshot_classifier(image):
|
101 |
+
image_input = model_dict['clip_preprocess'](image).unsqueeze(0).to(model_dict['device'])
|
102 |
+
with torch.no_grad():
|
103 |
+
image_features = model_dict['clip_model'].encode_image(image_input)
|
104 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
105 |
+
|
106 |
+
sim = (100.0 * image_features @ model_dict['openimage_classifier_weights'].T).softmax(dim=-1)
|
107 |
+
openimage_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(10)]
|
108 |
+
openimage_classes = [model_dict['openimage_classnames'][idx] for idx in indices]
|
109 |
+
|
110 |
+
sim = (100.0 * image_features @ model_dict['tencentml_classifier_weights'].T).softmax(dim=-1)
|
111 |
+
tencentml_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(10)]
|
112 |
+
tencentml_classes = [model_dict['tencentml_classnames'][idx] for idx in indices]
|
113 |
+
|
114 |
+
sim = (100.0 * image_features @ model_dict['place365_classifier_weights'].T).softmax(dim=-1)
|
115 |
+
place365_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(10)]
|
116 |
+
place365_classes = [model_dict['place365_classnames'][idx] for idx in indices]
|
117 |
+
|
118 |
+
sim = (100.0 * image_features @ model_dict['imgtype_classifier_weights'].T).softmax(dim=-1)
|
119 |
+
imgtype_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(model_dict['imgtype_classnames']))]
|
120 |
+
imgtype_classes = [model_dict['imgtype_classnames'][idx] for idx in indices]
|
121 |
+
|
122 |
+
sim = (100.0 * image_features @ model_dict['ppl_classifier_weights'].T).softmax(dim=-1)
|
123 |
+
ppl_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(model_dict['ppl_classnames']))]
|
124 |
+
ppl_classes = [model_dict['ppl_classnames'][idx] for idx in indices]
|
125 |
+
|
126 |
+
sim = (100.0 * image_features @ model_dict['ifppl_classifier_weights'].T).softmax(dim=-1)
|
127 |
+
ifppl_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(model_dict['ifppl_classnames']))]
|
128 |
+
ifppl_classes = [model_dict['ifppl_classnames'][idx] for idx in indices]
|
129 |
+
|
130 |
+
return image_features, openimage_scores, openimage_classes, tencentml_scores, tencentml_classes,\
|
131 |
+
place365_scores, place365_classes, imgtype_scores, imgtype_classes,\
|
132 |
+
ppl_scores, ppl_classes, ifppl_scores, ifppl_classes
|
133 |
+
|
134 |
+
|
135 |
+
def generate_prompt(openimage_classes, tencentml_classes, place365_classes, imgtype_classes, ppl_classes, ifppl_classes):
|
136 |
+
img_type = imgtype_classes[0]
|
137 |
+
ppl_result = ppl_classes[0]
|
138 |
+
if ppl_result == 'people':
|
139 |
+
ppl_result = ifppl_classes[0]
|
140 |
+
else:
|
141 |
+
ppl_result = 'are %s' % ppl_result
|
142 |
+
|
143 |
+
sorted_places = place365_classes
|
144 |
+
|
145 |
+
object_list = ''
|
146 |
+
for cls in tencentml_classes:
|
147 |
+
object_list += f'{cls}, '
|
148 |
+
for cls in openimage_classes[:2]:
|
149 |
+
object_list += f'{cls}, '
|
150 |
+
object_list = object_list[:-2]
|
151 |
+
|
152 |
+
prompt_caption = f'''I am an intelligent image captioning bot.
|
153 |
+
This image is a {img_type}. There {ppl_result}.
|
154 |
+
I think this photo was taken at a {sorted_places[0]}, {sorted_places[1]}, or {sorted_places[2]}.
|
155 |
+
I think there might be a {object_list} in this {img_type}.
|
156 |
+
A creative short caption I can generate to describe this image is:'''
|
157 |
+
|
158 |
+
#prompt_search = f'''Let's list keywords that include the following description.
|
159 |
+
#This image is a {img_type}. There {ppl_result}.
|
160 |
+
#I think this photo was taken at a {sorted_places[0]}, {sorted_places[1]}, or {sorted_places[2]}.
|
161 |
+
#I think there might be a {object_list} in this {img_type}.
|
162 |
+
#Relevant keywords which we can list and are seperated with comma are:'''
|
163 |
+
|
164 |
+
return prompt_caption
|
165 |
+
|
166 |
+
|
167 |
+
def generate_captions(prompt, num_captions=3):
|
168 |
+
headers = {"Authorization": f"Bearer {HF_TOKEN}"}
|
169 |
+
|
170 |
+
max_length = 16
|
171 |
+
seed = 42
|
172 |
+
sample_or_greedy = 'Greedy'
|
173 |
+
input_sentence = prompt
|
174 |
+
if sample_or_greedy == "Sample":
|
175 |
+
parameters = {
|
176 |
+
"max_new_tokens": max_length,
|
177 |
+
"top_p": 0.7,
|
178 |
+
"do_sample": True,
|
179 |
+
"seed": seed,
|
180 |
+
"early_stopping": False,
|
181 |
+
"length_penalty": 0.0,
|
182 |
+
"eos_token_id": None,
|
183 |
+
}
|
184 |
+
else:
|
185 |
+
parameters = {
|
186 |
+
"max_new_tokens": max_length,
|
187 |
+
"do_sample": False,
|
188 |
+
"seed": seed,
|
189 |
+
"early_stopping": False,
|
190 |
+
"length_penalty": 0.0,
|
191 |
+
"eos_token_id": None,
|
192 |
+
}
|
193 |
+
|
194 |
+
payload = {"inputs": input_sentence, "parameters": parameters,"options" : {"use_cache": False}}
|
195 |
+
|
196 |
+
bloom_results = []
|
197 |
+
for _ in range(num_captions):
|
198 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
199 |
+
output = response.json()
|
200 |
+
generated_text = output[0]['generated_text'].replace(prompt, '')
|
201 |
+
bloom_results.append(generated_text)
|
202 |
+
return bloom_results
|
203 |
+
|
204 |
+
|
205 |
+
def sorting_texts(image_features, captions):
|
206 |
+
with torch.no_grad():
|
207 |
+
text_inputs = clip.tokenize(captions).to(model_dict['device'])
|
208 |
+
text_features = model_dict['clip_model'].encode_text(text_inputs)
|
209 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
210 |
+
|
211 |
+
sim = (100.0 * image_features @ text_features.T).softmax(dim=-1)
|
212 |
+
scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(captions))]
|
213 |
+
sorted_captions = [captions[idx] for idx in indices]
|
214 |
+
|
215 |
+
return scores, sorted_captions
|
216 |
+
|
217 |
+
|
218 |
+
def postprocess_results(scores, classes):
|
219 |
+
scores = [float('%.4f' % float(val)) for val in scores]
|
220 |
+
outputs = []
|
221 |
+
for score, cls in zip(scores, classes):
|
222 |
+
outputs.append({'score': score, 'output': cls})
|
223 |
+
return outputs
|
224 |
+
|
225 |
+
|
226 |
+
def image_captioning(image):
|
227 |
+
start_time = time.time()
|
228 |
+
image_features, openimage_scores, openimage_classes, tencentml_scores, tencentml_classes, place365_scores, place365_classes, imgtype_scores, imgtype_classes, ppl_scores, ppl_classes, ifppl_scores, ifppl_classes = zeroshot_classifier(image)
|
229 |
+
end_zeroshot = time.time()
|
230 |
+
prompt_caption = generate_prompt(openimage_classes, tencentml_classes, place365_classes, imgtype_classes, ppl_classes, ifppl_classes)
|
231 |
+
generated_captions = generate_captions(prompt_caption, num_captions=1)
|
232 |
+
end_bloom = time.time()
|
233 |
+
caption_scores, sorted_captions = sorting_texts(image_features, generated_captions)
|
234 |
+
|
235 |
+
output_dict = {}
|
236 |
+
output_dict['inference_time'] = {'CLIP inference': end_zeroshot - start_time,
|
237 |
+
'BLOOM request': end_bloom - end_zeroshot}
|
238 |
+
|
239 |
+
output_dict['generated_captions'] = postprocess_results(caption_scores, sorted_captions)
|
240 |
+
output_dict['reasoning'] = {'openimage_results': postprocess_results(openimage_scores, openimage_classes),
|
241 |
+
'tencentml_results': postprocess_results(tencentml_scores, tencentml_classes),
|
242 |
+
'place365_results': postprocess_results(place365_scores, place365_classes),
|
243 |
+
'imgtype_results': postprocess_results(imgtype_scores, imgtype_classes),
|
244 |
+
'ppl_results': postprocess_results(ppl_scores, ppl_classes),
|
245 |
+
'ifppl_results': postprocess_results(ifppl_scores, ifppl_classes)}
|
246 |
+
return output_dict
|
247 |
+
|
248 |
+
|
249 |
+
if __name__ == '__main__':
|
250 |
+
print('\tinit models')
|
251 |
+
|
252 |
+
global model_dict
|
253 |
+
|
254 |
+
model_dict = load_models()
|
255 |
+
|
256 |
+
# define gradio demo
|
257 |
+
inputs = [gr.inputs.Image(type="pil", label="Image")
|
258 |
+
]
|
259 |
+
|
260 |
+
outputs = gr.outputs.JSON()
|
261 |
+
|
262 |
+
title = "Socratic models for image captioning with BLOOM"
|
263 |
+
|
264 |
+
demo_status = "Demo is running on CPU"
|
265 |
+
description = f"Details: https://github.com/geonm/socratic-models-demo. {demo_status}"
|
266 |
+
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2204.00598'>Socratic Models: Composing Zero-Shot Multimodal Reasoning with Language</a></p>"
|
267 |
+
examples = ['k21-1.jpg']
|
268 |
+
|
269 |
+
gr.Interface(image_captioning,
|
270 |
+
inputs,
|
271 |
+
outputs,
|
272 |
+
title=title,
|
273 |
+
description=description,
|
274 |
+
article=article,
|
275 |
+
examples=examples,
|
276 |
+
#examples_per_page=50,
|
277 |
+
).launch()
|
k21-1.jpg
ADDED
prompts/categories_places365.txt
ADDED
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/a/airfield 0
|
2 |
+
/a/airplane_cabin 1
|
3 |
+
/a/airport_terminal 2
|
4 |
+
/a/alcove 3
|
5 |
+
/a/alley 4
|
6 |
+
/a/amphitheater 5
|
7 |
+
/a/amusement_arcade 6
|
8 |
+
/a/amusement_park 7
|
9 |
+
/a/apartment_building/outdoor 8
|
10 |
+
/a/aquarium 9
|
11 |
+
/a/aqueduct 10
|
12 |
+
/a/arcade 11
|
13 |
+
/a/arch 12
|
14 |
+
/a/archaelogical_excavation 13
|
15 |
+
/a/archive 14
|
16 |
+
/a/arena/hockey 15
|
17 |
+
/a/arena/performance 16
|
18 |
+
/a/arena/rodeo 17
|
19 |
+
/a/army_base 18
|
20 |
+
/a/art_gallery 19
|
21 |
+
/a/art_school 20
|
22 |
+
/a/art_studio 21
|
23 |
+
/a/artists_loft 22
|
24 |
+
/a/assembly_line 23
|
25 |
+
/a/athletic_field/outdoor 24
|
26 |
+
/a/atrium/public 25
|
27 |
+
/a/attic 26
|
28 |
+
/a/auditorium 27
|
29 |
+
/a/auto_factory 28
|
30 |
+
/a/auto_showroom 29
|
31 |
+
/b/badlands 30
|
32 |
+
/b/bakery/shop 31
|
33 |
+
/b/balcony/exterior 32
|
34 |
+
/b/balcony/interior 33
|
35 |
+
/b/ball_pit 34
|
36 |
+
/b/ballroom 35
|
37 |
+
/b/bamboo_forest 36
|
38 |
+
/b/bank_vault 37
|
39 |
+
/b/banquet_hall 38
|
40 |
+
/b/bar 39
|
41 |
+
/b/barn 40
|
42 |
+
/b/barndoor 41
|
43 |
+
/b/baseball_field 42
|
44 |
+
/b/basement 43
|
45 |
+
/b/basketball_court/indoor 44
|
46 |
+
/b/bathroom 45
|
47 |
+
/b/bazaar/indoor 46
|
48 |
+
/b/bazaar/outdoor 47
|
49 |
+
/b/beach 48
|
50 |
+
/b/beach_house 49
|
51 |
+
/b/beauty_salon 50
|
52 |
+
/b/bedchamber 51
|
53 |
+
/b/bedroom 52
|
54 |
+
/b/beer_garden 53
|
55 |
+
/b/beer_hall 54
|
56 |
+
/b/berth 55
|
57 |
+
/b/biology_laboratory 56
|
58 |
+
/b/boardwalk 57
|
59 |
+
/b/boat_deck 58
|
60 |
+
/b/boathouse 59
|
61 |
+
/b/bookstore 60
|
62 |
+
/b/booth/indoor 61
|
63 |
+
/b/botanical_garden 62
|
64 |
+
/b/bow_window/indoor 63
|
65 |
+
/b/bowling_alley 64
|
66 |
+
/b/boxing_ring 65
|
67 |
+
/b/bridge 66
|
68 |
+
/b/building_facade 67
|
69 |
+
/b/bullring 68
|
70 |
+
/b/burial_chamber 69
|
71 |
+
/b/bus_interior 70
|
72 |
+
/b/bus_station/indoor 71
|
73 |
+
/b/butchers_shop 72
|
74 |
+
/b/butte 73
|
75 |
+
/c/cabin/outdoor 74
|
76 |
+
/c/cafeteria 75
|
77 |
+
/c/campsite 76
|
78 |
+
/c/campus 77
|
79 |
+
/c/canal/natural 78
|
80 |
+
/c/canal/urban 79
|
81 |
+
/c/candy_store 80
|
82 |
+
/c/canyon 81
|
83 |
+
/c/car_interior 82
|
84 |
+
/c/carrousel 83
|
85 |
+
/c/castle 84
|
86 |
+
/c/catacomb 85
|
87 |
+
/c/cemetery 86
|
88 |
+
/c/chalet 87
|
89 |
+
/c/chemistry_lab 88
|
90 |
+
/c/childs_room 89
|
91 |
+
/c/church/indoor 90
|
92 |
+
/c/church/outdoor 91
|
93 |
+
/c/classroom 92
|
94 |
+
/c/clean_room 93
|
95 |
+
/c/cliff 94
|
96 |
+
/c/closet 95
|
97 |
+
/c/clothing_store 96
|
98 |
+
/c/coast 97
|
99 |
+
/c/cockpit 98
|
100 |
+
/c/coffee_shop 99
|
101 |
+
/c/computer_room 100
|
102 |
+
/c/conference_center 101
|
103 |
+
/c/conference_room 102
|
104 |
+
/c/construction_site 103
|
105 |
+
/c/corn_field 104
|
106 |
+
/c/corral 105
|
107 |
+
/c/corridor 106
|
108 |
+
/c/cottage 107
|
109 |
+
/c/courthouse 108
|
110 |
+
/c/courtyard 109
|
111 |
+
/c/creek 110
|
112 |
+
/c/crevasse 111
|
113 |
+
/c/crosswalk 112
|
114 |
+
/d/dam 113
|
115 |
+
/d/delicatessen 114
|
116 |
+
/d/department_store 115
|
117 |
+
/d/desert/sand 116
|
118 |
+
/d/desert/vegetation 117
|
119 |
+
/d/desert_road 118
|
120 |
+
/d/diner/outdoor 119
|
121 |
+
/d/dining_hall 120
|
122 |
+
/d/dining_room 121
|
123 |
+
/d/discotheque 122
|
124 |
+
/d/doorway/outdoor 123
|
125 |
+
/d/dorm_room 124
|
126 |
+
/d/downtown 125
|
127 |
+
/d/dressing_room 126
|
128 |
+
/d/driveway 127
|
129 |
+
/d/drugstore 128
|
130 |
+
/e/elevator/door 129
|
131 |
+
/e/elevator_lobby 130
|
132 |
+
/e/elevator_shaft 131
|
133 |
+
/e/embassy 132
|
134 |
+
/e/engine_room 133
|
135 |
+
/e/entrance_hall 134
|
136 |
+
/e/escalator/indoor 135
|
137 |
+
/e/excavation 136
|
138 |
+
/f/fabric_store 137
|
139 |
+
/f/farm 138
|
140 |
+
/f/fastfood_restaurant 139
|
141 |
+
/f/field/cultivated 140
|
142 |
+
/f/field/wild 141
|
143 |
+
/f/field_road 142
|
144 |
+
/f/fire_escape 143
|
145 |
+
/f/fire_station 144
|
146 |
+
/f/fishpond 145
|
147 |
+
/f/flea_market/indoor 146
|
148 |
+
/f/florist_shop/indoor 147
|
149 |
+
/f/food_court 148
|
150 |
+
/f/football_field 149
|
151 |
+
/f/forest/broadleaf 150
|
152 |
+
/f/forest_path 151
|
153 |
+
/f/forest_road 152
|
154 |
+
/f/formal_garden 153
|
155 |
+
/f/fountain 154
|
156 |
+
/g/galley 155
|
157 |
+
/g/garage/indoor 156
|
158 |
+
/g/garage/outdoor 157
|
159 |
+
/g/gas_station 158
|
160 |
+
/g/gazebo/exterior 159
|
161 |
+
/g/general_store/indoor 160
|
162 |
+
/g/general_store/outdoor 161
|
163 |
+
/g/gift_shop 162
|
164 |
+
/g/glacier 163
|
165 |
+
/g/golf_course 164
|
166 |
+
/g/greenhouse/indoor 165
|
167 |
+
/g/greenhouse/outdoor 166
|
168 |
+
/g/grotto 167
|
169 |
+
/g/gymnasium/indoor 168
|
170 |
+
/h/hangar/indoor 169
|
171 |
+
/h/hangar/outdoor 170
|
172 |
+
/h/harbor 171
|
173 |
+
/h/hardware_store 172
|
174 |
+
/h/hayfield 173
|
175 |
+
/h/heliport 174
|
176 |
+
/h/highway 175
|
177 |
+
/h/home_office 176
|
178 |
+
/h/home_theater 177
|
179 |
+
/h/hospital 178
|
180 |
+
/h/hospital_room 179
|
181 |
+
/h/hot_spring 180
|
182 |
+
/h/hotel/outdoor 181
|
183 |
+
/h/hotel_room 182
|
184 |
+
/h/house 183
|
185 |
+
/h/hunting_lodge/outdoor 184
|
186 |
+
/i/ice_cream_parlor 185
|
187 |
+
/i/ice_floe 186
|
188 |
+
/i/ice_shelf 187
|
189 |
+
/i/ice_skating_rink/indoor 188
|
190 |
+
/i/ice_skating_rink/outdoor 189
|
191 |
+
/i/iceberg 190
|
192 |
+
/i/igloo 191
|
193 |
+
/i/industrial_area 192
|
194 |
+
/i/inn/outdoor 193
|
195 |
+
/i/islet 194
|
196 |
+
/j/jacuzzi/indoor 195
|
197 |
+
/j/jail_cell 196
|
198 |
+
/j/japanese_garden 197
|
199 |
+
/j/jewelry_shop 198
|
200 |
+
/j/junkyard 199
|
201 |
+
/k/kasbah 200
|
202 |
+
/k/kennel/outdoor 201
|
203 |
+
/k/kindergarden_classroom 202
|
204 |
+
/k/kitchen 203
|
205 |
+
/l/lagoon 204
|
206 |
+
/l/lake/natural 205
|
207 |
+
/l/landfill 206
|
208 |
+
/l/landing_deck 207
|
209 |
+
/l/laundromat 208
|
210 |
+
/l/lawn 209
|
211 |
+
/l/lecture_room 210
|
212 |
+
/l/legislative_chamber 211
|
213 |
+
/l/library/indoor 212
|
214 |
+
/l/library/outdoor 213
|
215 |
+
/l/lighthouse 214
|
216 |
+
/l/living_room 215
|
217 |
+
/l/loading_dock 216
|
218 |
+
/l/lobby 217
|
219 |
+
/l/lock_chamber 218
|
220 |
+
/l/locker_room 219
|
221 |
+
/m/mansion 220
|
222 |
+
/m/manufactured_home 221
|
223 |
+
/m/market/indoor 222
|
224 |
+
/m/market/outdoor 223
|
225 |
+
/m/marsh 224
|
226 |
+
/m/martial_arts_gym 225
|
227 |
+
/m/mausoleum 226
|
228 |
+
/m/medina 227
|
229 |
+
/m/mezzanine 228
|
230 |
+
/m/moat/water 229
|
231 |
+
/m/mosque/outdoor 230
|
232 |
+
/m/motel 231
|
233 |
+
/m/mountain 232
|
234 |
+
/m/mountain_path 233
|
235 |
+
/m/mountain_snowy 234
|
236 |
+
/m/movie_theater/indoor 235
|
237 |
+
/m/museum/indoor 236
|
238 |
+
/m/museum/outdoor 237
|
239 |
+
/m/music_studio 238
|
240 |
+
/n/natural_history_museum 239
|
241 |
+
/n/nursery 240
|
242 |
+
/n/nursing_home 241
|
243 |
+
/o/oast_house 242
|
244 |
+
/o/ocean 243
|
245 |
+
/o/office 244
|
246 |
+
/o/office_building 245
|
247 |
+
/o/office_cubicles 246
|
248 |
+
/o/oilrig 247
|
249 |
+
/o/operating_room 248
|
250 |
+
/o/orchard 249
|
251 |
+
/o/orchestra_pit 250
|
252 |
+
/p/pagoda 251
|
253 |
+
/p/palace 252
|
254 |
+
/p/pantry 253
|
255 |
+
/p/park 254
|
256 |
+
/p/parking_garage/indoor 255
|
257 |
+
/p/parking_garage/outdoor 256
|
258 |
+
/p/parking_lot 257
|
259 |
+
/p/pasture 258
|
260 |
+
/p/patio 259
|
261 |
+
/p/pavilion 260
|
262 |
+
/p/pet_shop 261
|
263 |
+
/p/pharmacy 262
|
264 |
+
/p/phone_booth 263
|
265 |
+
/p/physics_laboratory 264
|
266 |
+
/p/picnic_area 265
|
267 |
+
/p/pier 266
|
268 |
+
/p/pizzeria 267
|
269 |
+
/p/playground 268
|
270 |
+
/p/playroom 269
|
271 |
+
/p/plaza 270
|
272 |
+
/p/pond 271
|
273 |
+
/p/porch 272
|
274 |
+
/p/promenade 273
|
275 |
+
/p/pub/indoor 274
|
276 |
+
/r/racecourse 275
|
277 |
+
/r/raceway 276
|
278 |
+
/r/raft 277
|
279 |
+
/r/railroad_track 278
|
280 |
+
/r/rainforest 279
|
281 |
+
/r/reception 280
|
282 |
+
/r/recreation_room 281
|
283 |
+
/r/repair_shop 282
|
284 |
+
/r/residential_neighborhood 283
|
285 |
+
/r/restaurant 284
|
286 |
+
/r/restaurant_kitchen 285
|
287 |
+
/r/restaurant_patio 286
|
288 |
+
/r/rice_paddy 287
|
289 |
+
/r/river 288
|
290 |
+
/r/rock_arch 289
|
291 |
+
/r/roof_garden 290
|
292 |
+
/r/rope_bridge 291
|
293 |
+
/r/ruin 292
|
294 |
+
/r/runway 293
|
295 |
+
/s/sandbox 294
|
296 |
+
/s/sauna 295
|
297 |
+
/s/schoolhouse 296
|
298 |
+
/s/science_museum 297
|
299 |
+
/s/server_room 298
|
300 |
+
/s/shed 299
|
301 |
+
/s/shoe_shop 300
|
302 |
+
/s/shopfront 301
|
303 |
+
/s/shopping_mall/indoor 302
|
304 |
+
/s/shower 303
|
305 |
+
/s/ski_resort 304
|
306 |
+
/s/ski_slope 305
|
307 |
+
/s/sky 306
|
308 |
+
/s/skyscraper 307
|
309 |
+
/s/slum 308
|
310 |
+
/s/snowfield 309
|
311 |
+
/s/soccer_field 310
|
312 |
+
/s/stable 311
|
313 |
+
/s/stadium/baseball 312
|
314 |
+
/s/stadium/football 313
|
315 |
+
/s/stadium/soccer 314
|
316 |
+
/s/stage/indoor 315
|
317 |
+
/s/stage/outdoor 316
|
318 |
+
/s/staircase 317
|
319 |
+
/s/storage_room 318
|
320 |
+
/s/street 319
|
321 |
+
/s/subway_station/platform 320
|
322 |
+
/s/supermarket 321
|
323 |
+
/s/sushi_bar 322
|
324 |
+
/s/swamp 323
|
325 |
+
/s/swimming_hole 324
|
326 |
+
/s/swimming_pool/indoor 325
|
327 |
+
/s/swimming_pool/outdoor 326
|
328 |
+
/s/synagogue/outdoor 327
|
329 |
+
/t/television_room 328
|
330 |
+
/t/television_studio 329
|
331 |
+
/t/temple/asia 330
|
332 |
+
/t/throne_room 331
|
333 |
+
/t/ticket_booth 332
|
334 |
+
/t/topiary_garden 333
|
335 |
+
/t/tower 334
|
336 |
+
/t/toyshop 335
|
337 |
+
/t/train_interior 336
|
338 |
+
/t/train_station/platform 337
|
339 |
+
/t/tree_farm 338
|
340 |
+
/t/tree_house 339
|
341 |
+
/t/trench 340
|
342 |
+
/t/tundra 341
|
343 |
+
/u/underwater/ocean_deep 342
|
344 |
+
/u/utility_room 343
|
345 |
+
/v/valley 344
|
346 |
+
/v/vegetable_garden 345
|
347 |
+
/v/veterinarians_office 346
|
348 |
+
/v/viaduct 347
|
349 |
+
/v/village 348
|
350 |
+
/v/vineyard 349
|
351 |
+
/v/volcano 350
|
352 |
+
/v/volleyball_court/outdoor 351
|
353 |
+
/w/waiting_room 352
|
354 |
+
/w/water_park 353
|
355 |
+
/w/water_tower 354
|
356 |
+
/w/waterfall 355
|
357 |
+
/w/watering_hole 356
|
358 |
+
/w/wave 357
|
359 |
+
/w/wet_bar 358
|
360 |
+
/w/wheat_field 359
|
361 |
+
/w/wind_farm 360
|
362 |
+
/w/windmill 361
|
363 |
+
/y/yard 362
|
364 |
+
/y/youth_hostel 363
|
365 |
+
/z/zen_garden 364
|
prompts/extract_text_features.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
import clip
|
5 |
+
import csv
|
6 |
+
import tqdm
|
7 |
+
from profanity_filter import ProfanityFilter
|
8 |
+
|
9 |
+
|
10 |
+
templates = [
|
11 |
+
lambda c: f'a bad photo of a {c}.',
|
12 |
+
lambda c: f'a photo of many {c}.',
|
13 |
+
lambda c: f'a sculpture of a {c}.',
|
14 |
+
lambda c: f'a photo of the hard to see {c}.',
|
15 |
+
lambda c: f'a low resolution photo of the {c}.',
|
16 |
+
lambda c: f'a rendering of a {c}.',
|
17 |
+
lambda c: f'graffiti of a {c}.',
|
18 |
+
lambda c: f'a bad photo of the {c}.',
|
19 |
+
lambda c: f'a cropped photo of the {c}.',
|
20 |
+
lambda c: f'a tattoo of a {c}.',
|
21 |
+
lambda c: f'the embroidered {c}.',
|
22 |
+
lambda c: f'a photo of a hard to see {c}.',
|
23 |
+
lambda c: f'a bright photo of a {c}.',
|
24 |
+
lambda c: f'a photo of a clean {c}.',
|
25 |
+
lambda c: f'a photo of a dirty {c}.',
|
26 |
+
lambda c: f'a dark photo of the {c}.',
|
27 |
+
lambda c: f'a drawing of a {c}.',
|
28 |
+
lambda c: f'a photo of my {c}.',
|
29 |
+
lambda c: f'the plastic {c}.',
|
30 |
+
lambda c: f'a photo of the cool {c}.',
|
31 |
+
lambda c: f'a close-up photo of a {c}.',
|
32 |
+
lambda c: f'a black and white photo of the {c}.',
|
33 |
+
lambda c: f'a painting of the {c}.',
|
34 |
+
lambda c: f'a painting of a {c}.',
|
35 |
+
lambda c: f'a pixelated photo of the {c}.',
|
36 |
+
lambda c: f'a sculpture of the {c}.',
|
37 |
+
lambda c: f'a bright photo of the {c}.',
|
38 |
+
lambda c: f'a cropped photo of a {c}.',
|
39 |
+
lambda c: f'a plastic {c}.',
|
40 |
+
lambda c: f'a photo of the dirty {c}.',
|
41 |
+
lambda c: f'a jpeg corrupted photo of a {c}.',
|
42 |
+
lambda c: f'a blurry photo of the {c}.',
|
43 |
+
lambda c: f'a photo of the {c}.',
|
44 |
+
lambda c: f'a good photo of the {c}.',
|
45 |
+
lambda c: f'a rendering of the {c}.',
|
46 |
+
lambda c: f'a {c} in a video game.',
|
47 |
+
lambda c: f'a photo of one {c}.',
|
48 |
+
lambda c: f'a doodle of a {c}.',
|
49 |
+
lambda c: f'a close-up photo of the {c}.',
|
50 |
+
lambda c: f'a photo of a {c}.',
|
51 |
+
lambda c: f'the origami {c}.',
|
52 |
+
lambda c: f'the {c} in a video game.',
|
53 |
+
lambda c: f'a sketch of a {c}.',
|
54 |
+
lambda c: f'a doodle of the {c}.',
|
55 |
+
lambda c: f'a origami {c}.',
|
56 |
+
lambda c: f'a low resolution photo of a {c}.',
|
57 |
+
lambda c: f'the toy {c}.',
|
58 |
+
lambda c: f'a rendition of the {c}.',
|
59 |
+
lambda c: f'a photo of the clean {c}.',
|
60 |
+
lambda c: f'a photo of a large {c}.',
|
61 |
+
lambda c: f'a rendition of a {c}.',
|
62 |
+
lambda c: f'a photo of a nice {c}.',
|
63 |
+
lambda c: f'a photo of a weird {c}.',
|
64 |
+
lambda c: f'a blurry photo of a {c}.',
|
65 |
+
lambda c: f'a cartoon {c}.',
|
66 |
+
lambda c: f'art of a {c}.',
|
67 |
+
lambda c: f'a sketch of the {c}.',
|
68 |
+
lambda c: f'a embroidered {c}.',
|
69 |
+
lambda c: f'a pixelated photo of a {c}.',
|
70 |
+
lambda c: f'itap of the {c}.',
|
71 |
+
lambda c: f'a jpeg corrupted photo of the {c}.',
|
72 |
+
lambda c: f'a good photo of a {c}.',
|
73 |
+
lambda c: f'a plushie {c}.',
|
74 |
+
lambda c: f'a photo of the nice {c}.',
|
75 |
+
lambda c: f'a photo of the small {c}.',
|
76 |
+
lambda c: f'a photo of the weird {c}.',
|
77 |
+
lambda c: f'the cartoon {c}.',
|
78 |
+
lambda c: f'art of the {c}.',
|
79 |
+
lambda c: f'a drawing of the {c}.',
|
80 |
+
lambda c: f'a photo of the large {c}.',
|
81 |
+
lambda c: f'a black and white photo of a {c}.',
|
82 |
+
lambda c: f'the plushie {c}.',
|
83 |
+
lambda c: f'a dark photo of a {c}.',
|
84 |
+
lambda c: f'itap of a {c}.',
|
85 |
+
lambda c: f'graffiti of the {c}.',
|
86 |
+
lambda c: f'a toy {c}.',
|
87 |
+
lambda c: f'itap of my {c}.',
|
88 |
+
lambda c: f'a photo of a cool {c}.',
|
89 |
+
lambda c: f'a photo of a small {c}.',
|
90 |
+
lambda c: f'a tattoo of the {c}.',
|
91 |
+
]
|
92 |
+
|
93 |
+
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
|
94 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
95 |
+
clip_model, clip_preprocess = clip.load("ViT-L/14", device=device)
|
96 |
+
|
97 |
+
'''
|
98 |
+
csv_data = open('openimage-classnames.csv')
|
99 |
+
csv_reader = csv.reader(csv_data)
|
100 |
+
class_names = []
|
101 |
+
for row in csv_reader:
|
102 |
+
class_names.append(row[-1])
|
103 |
+
'''
|
104 |
+
'''
|
105 |
+
txt_data = open('tencent-ml-images.txt')
|
106 |
+
pf = ProfanityFilter()
|
107 |
+
lines = txt_data.readlines()
|
108 |
+
class_names = []
|
109 |
+
for line in lines[4:]:
|
110 |
+
class_name_precook = line.strip().split('\t')[-1]
|
111 |
+
safe_list = ''
|
112 |
+
for class_name in class_name_precook.split(', '):
|
113 |
+
if pf.is_clean(class_name):
|
114 |
+
safe_list += '%s, ' % class_name
|
115 |
+
safe_list = safe_list[:-2]
|
116 |
+
if len(safe_list) > 0:
|
117 |
+
class_names.append(safe_list)
|
118 |
+
f_w = open('tencent-ml-classnames.txt', 'w')
|
119 |
+
for cln in class_names:
|
120 |
+
f_w.write('%s\n' % cln)
|
121 |
+
f_w.close()
|
122 |
+
'''
|
123 |
+
place_categories = np.loadtxt('categories_places365.txt', dtype=str)
|
124 |
+
place_texts = []
|
125 |
+
for place in place_categories[:, 0]:
|
126 |
+
place = place.split('/')[2:]
|
127 |
+
if len(place) > 1:
|
128 |
+
place = place[1] + ' ' + place[0]
|
129 |
+
else:
|
130 |
+
place = place[0]
|
131 |
+
place = place.replace('_', ' ')
|
132 |
+
place_texts.append(place)
|
133 |
+
class_names = place_texts
|
134 |
+
f_w = open('place365-classnames.txt', 'w')
|
135 |
+
for cln in class_names:
|
136 |
+
f_w.write('%s\n' % cln)
|
137 |
+
f_w.close()
|
138 |
+
print(class_names)
|
139 |
+
|
140 |
+
class_weights = []
|
141 |
+
with torch.no_grad():
|
142 |
+
for classname in tqdm.tqdm(class_names, desc='encoding text'):
|
143 |
+
texts = [template(classname) for template in templates]
|
144 |
+
text_inputs = clip.tokenize(texts).to(device)
|
145 |
+
text_features = clip_model.encode_text(text_inputs)
|
146 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
147 |
+
text_features = text_features.mean(dim=0)
|
148 |
+
text_features /= text_features.norm()
|
149 |
+
class_weights.append(text_features)
|
150 |
+
|
151 |
+
class_weights = torch.stack(class_weights)
|
152 |
+
print(class_weights.shape)
|
153 |
+
#torch.save(class_weights, 'clip_ViTL14_openimage_classifier_weights.pt')
|
154 |
+
torch.save(class_weights, 'clip_ViTL14_place365_classifier_weights.pt')
|
prompts/openimage-classnames.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
prompts/place365-classnames.txt
ADDED
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
airfield
|
2 |
+
airplane cabin
|
3 |
+
airport terminal
|
4 |
+
alcove
|
5 |
+
alley
|
6 |
+
amphitheater
|
7 |
+
amusement arcade
|
8 |
+
amusement park
|
9 |
+
outdoor apartment building
|
10 |
+
aquarium
|
11 |
+
aqueduct
|
12 |
+
arcade
|
13 |
+
arch
|
14 |
+
archaelogical excavation
|
15 |
+
archive
|
16 |
+
hockey arena
|
17 |
+
performance arena
|
18 |
+
rodeo arena
|
19 |
+
army base
|
20 |
+
art gallery
|
21 |
+
art school
|
22 |
+
art studio
|
23 |
+
artists loft
|
24 |
+
assembly line
|
25 |
+
outdoor athletic field
|
26 |
+
public atrium
|
27 |
+
attic
|
28 |
+
auditorium
|
29 |
+
auto factory
|
30 |
+
auto showroom
|
31 |
+
badlands
|
32 |
+
shop bakery
|
33 |
+
exterior balcony
|
34 |
+
interior balcony
|
35 |
+
ball pit
|
36 |
+
ballroom
|
37 |
+
bamboo forest
|
38 |
+
bank vault
|
39 |
+
banquet hall
|
40 |
+
bar
|
41 |
+
barn
|
42 |
+
barndoor
|
43 |
+
baseball field
|
44 |
+
basement
|
45 |
+
indoor basketball court
|
46 |
+
bathroom
|
47 |
+
indoor bazaar
|
48 |
+
outdoor bazaar
|
49 |
+
beach
|
50 |
+
beach house
|
51 |
+
beauty salon
|
52 |
+
bedchamber
|
53 |
+
bedroom
|
54 |
+
beer garden
|
55 |
+
beer hall
|
56 |
+
berth
|
57 |
+
biology laboratory
|
58 |
+
boardwalk
|
59 |
+
boat deck
|
60 |
+
boathouse
|
61 |
+
bookstore
|
62 |
+
indoor booth
|
63 |
+
botanical garden
|
64 |
+
indoor bow window
|
65 |
+
bowling alley
|
66 |
+
boxing ring
|
67 |
+
bridge
|
68 |
+
building facade
|
69 |
+
bullring
|
70 |
+
burial chamber
|
71 |
+
bus interior
|
72 |
+
indoor bus station
|
73 |
+
butchers shop
|
74 |
+
butte
|
75 |
+
outdoor cabin
|
76 |
+
cafeteria
|
77 |
+
campsite
|
78 |
+
campus
|
79 |
+
natural canal
|
80 |
+
urban canal
|
81 |
+
candy store
|
82 |
+
canyon
|
83 |
+
car interior
|
84 |
+
carrousel
|
85 |
+
castle
|
86 |
+
catacomb
|
87 |
+
cemetery
|
88 |
+
chalet
|
89 |
+
chemistry lab
|
90 |
+
childs room
|
91 |
+
indoor church
|
92 |
+
outdoor church
|
93 |
+
classroom
|
94 |
+
clean room
|
95 |
+
cliff
|
96 |
+
closet
|
97 |
+
clothing store
|
98 |
+
coast
|
99 |
+
cockpit
|
100 |
+
coffee shop
|
101 |
+
computer room
|
102 |
+
conference center
|
103 |
+
conference room
|
104 |
+
construction site
|
105 |
+
corn field
|
106 |
+
corral
|
107 |
+
corridor
|
108 |
+
cottage
|
109 |
+
courthouse
|
110 |
+
courtyard
|
111 |
+
creek
|
112 |
+
crevasse
|
113 |
+
crosswalk
|
114 |
+
dam
|
115 |
+
delicatessen
|
116 |
+
department store
|
117 |
+
sand desert
|
118 |
+
vegetation desert
|
119 |
+
desert road
|
120 |
+
outdoor diner
|
121 |
+
dining hall
|
122 |
+
dining room
|
123 |
+
discotheque
|
124 |
+
outdoor doorway
|
125 |
+
dorm room
|
126 |
+
downtown
|
127 |
+
dressing room
|
128 |
+
driveway
|
129 |
+
drugstore
|
130 |
+
door elevator
|
131 |
+
elevator lobby
|
132 |
+
elevator shaft
|
133 |
+
embassy
|
134 |
+
engine room
|
135 |
+
entrance hall
|
136 |
+
indoor escalator
|
137 |
+
excavation
|
138 |
+
fabric store
|
139 |
+
farm
|
140 |
+
fastfood restaurant
|
141 |
+
cultivated field
|
142 |
+
wild field
|
143 |
+
field road
|
144 |
+
fire escape
|
145 |
+
fire station
|
146 |
+
fishpond
|
147 |
+
indoor flea market
|
148 |
+
indoor florist shop
|
149 |
+
food court
|
150 |
+
football field
|
151 |
+
broadleaf forest
|
152 |
+
forest path
|
153 |
+
forest road
|
154 |
+
formal garden
|
155 |
+
fountain
|
156 |
+
galley
|
157 |
+
indoor garage
|
158 |
+
outdoor garage
|
159 |
+
gas station
|
160 |
+
exterior gazebo
|
161 |
+
indoor general store
|
162 |
+
outdoor general store
|
163 |
+
gift shop
|
164 |
+
glacier
|
165 |
+
golf course
|
166 |
+
indoor greenhouse
|
167 |
+
outdoor greenhouse
|
168 |
+
grotto
|
169 |
+
indoor gymnasium
|
170 |
+
indoor hangar
|
171 |
+
outdoor hangar
|
172 |
+
harbor
|
173 |
+
hardware store
|
174 |
+
hayfield
|
175 |
+
heliport
|
176 |
+
highway
|
177 |
+
home office
|
178 |
+
home theater
|
179 |
+
hospital
|
180 |
+
hospital room
|
181 |
+
hot spring
|
182 |
+
outdoor hotel
|
183 |
+
hotel room
|
184 |
+
house
|
185 |
+
outdoor hunting lodge
|
186 |
+
ice cream parlor
|
187 |
+
ice floe
|
188 |
+
ice shelf
|
189 |
+
indoor ice skating rink
|
190 |
+
outdoor ice skating rink
|
191 |
+
iceberg
|
192 |
+
igloo
|
193 |
+
industrial area
|
194 |
+
outdoor inn
|
195 |
+
islet
|
196 |
+
indoor jacuzzi
|
197 |
+
jail cell
|
198 |
+
japanese garden
|
199 |
+
jewelry shop
|
200 |
+
junkyard
|
201 |
+
kasbah
|
202 |
+
outdoor kennel
|
203 |
+
kindergarden classroom
|
204 |
+
kitchen
|
205 |
+
lagoon
|
206 |
+
natural lake
|
207 |
+
landfill
|
208 |
+
landing deck
|
209 |
+
laundromat
|
210 |
+
lawn
|
211 |
+
lecture room
|
212 |
+
legislative chamber
|
213 |
+
indoor library
|
214 |
+
outdoor library
|
215 |
+
lighthouse
|
216 |
+
living room
|
217 |
+
loading dock
|
218 |
+
lobby
|
219 |
+
lock chamber
|
220 |
+
locker room
|
221 |
+
mansion
|
222 |
+
manufactured home
|
223 |
+
indoor market
|
224 |
+
outdoor market
|
225 |
+
marsh
|
226 |
+
martial arts gym
|
227 |
+
mausoleum
|
228 |
+
medina
|
229 |
+
mezzanine
|
230 |
+
water moat
|
231 |
+
outdoor mosque
|
232 |
+
motel
|
233 |
+
mountain
|
234 |
+
mountain path
|
235 |
+
mountain snowy
|
236 |
+
indoor movie theater
|
237 |
+
indoor museum
|
238 |
+
outdoor museum
|
239 |
+
music studio
|
240 |
+
natural history museum
|
241 |
+
nursery
|
242 |
+
nursing home
|
243 |
+
oast house
|
244 |
+
ocean
|
245 |
+
office
|
246 |
+
office building
|
247 |
+
office cubicles
|
248 |
+
oilrig
|
249 |
+
operating room
|
250 |
+
orchard
|
251 |
+
orchestra pit
|
252 |
+
pagoda
|
253 |
+
palace
|
254 |
+
pantry
|
255 |
+
park
|
256 |
+
indoor parking garage
|
257 |
+
outdoor parking garage
|
258 |
+
parking lot
|
259 |
+
pasture
|
260 |
+
patio
|
261 |
+
pavilion
|
262 |
+
pet shop
|
263 |
+
pharmacy
|
264 |
+
phone booth
|
265 |
+
physics laboratory
|
266 |
+
picnic area
|
267 |
+
pier
|
268 |
+
pizzeria
|
269 |
+
playground
|
270 |
+
playroom
|
271 |
+
plaza
|
272 |
+
pond
|
273 |
+
porch
|
274 |
+
promenade
|
275 |
+
indoor pub
|
276 |
+
racecourse
|
277 |
+
raceway
|
278 |
+
raft
|
279 |
+
railroad track
|
280 |
+
rainforest
|
281 |
+
reception
|
282 |
+
recreation room
|
283 |
+
repair shop
|
284 |
+
residential neighborhood
|
285 |
+
restaurant
|
286 |
+
restaurant kitchen
|
287 |
+
restaurant patio
|
288 |
+
rice paddy
|
289 |
+
river
|
290 |
+
rock arch
|
291 |
+
roof garden
|
292 |
+
rope bridge
|
293 |
+
ruin
|
294 |
+
runway
|
295 |
+
sandbox
|
296 |
+
sauna
|
297 |
+
schoolhouse
|
298 |
+
science museum
|
299 |
+
server room
|
300 |
+
shed
|
301 |
+
shoe shop
|
302 |
+
shopfront
|
303 |
+
indoor shopping mall
|
304 |
+
shower
|
305 |
+
ski resort
|
306 |
+
ski slope
|
307 |
+
sky
|
308 |
+
skyscraper
|
309 |
+
slum
|
310 |
+
snowfield
|
311 |
+
soccer field
|
312 |
+
stable
|
313 |
+
baseball stadium
|
314 |
+
football stadium
|
315 |
+
soccer stadium
|
316 |
+
indoor stage
|
317 |
+
outdoor stage
|
318 |
+
staircase
|
319 |
+
storage room
|
320 |
+
street
|
321 |
+
platform subway station
|
322 |
+
supermarket
|
323 |
+
sushi bar
|
324 |
+
swamp
|
325 |
+
swimming hole
|
326 |
+
indoor swimming pool
|
327 |
+
outdoor swimming pool
|
328 |
+
outdoor synagogue
|
329 |
+
television room
|
330 |
+
television studio
|
331 |
+
asia temple
|
332 |
+
throne room
|
333 |
+
ticket booth
|
334 |
+
topiary garden
|
335 |
+
tower
|
336 |
+
toyshop
|
337 |
+
train interior
|
338 |
+
platform train station
|
339 |
+
tree farm
|
340 |
+
tree house
|
341 |
+
trench
|
342 |
+
tundra
|
343 |
+
ocean deep underwater
|
344 |
+
utility room
|
345 |
+
valley
|
346 |
+
vegetable garden
|
347 |
+
veterinarians office
|
348 |
+
viaduct
|
349 |
+
village
|
350 |
+
vineyard
|
351 |
+
volcano
|
352 |
+
outdoor volleyball court
|
353 |
+
waiting room
|
354 |
+
water park
|
355 |
+
water tower
|
356 |
+
waterfall
|
357 |
+
watering hole
|
358 |
+
wave
|
359 |
+
wet bar
|
360 |
+
wheat field
|
361 |
+
wind farm
|
362 |
+
windmill
|
363 |
+
yard
|
364 |
+
youth hostel
|
365 |
+
zen garden
|
prompts/tencent-ml-classnames.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
prompts/tencent-ml-images.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/huggingface/transformers
|
2 |
+
ftfy
|
3 |
+
regex
|
4 |
+
tqdm
|
5 |
+
git+https://github.com/openai/CLIP.git
|
6 |
+
gradio
|
7 |
+
torch
|
8 |
+
wget
|