geonmo.gu commited on
Commit
fba8607
β€’
1 Parent(s): 6019f50

initial commit

Browse files
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: Socratic Models Image Captioning With BLOOM
3
- emoji: πŸ”₯
4
- colorFrom: green
5
- colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.1.1
8
  app_file: app.py
9
  pinned: false
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Socratic Models Image Captioning
3
+ emoji: πŸ‘€
4
+ colorFrom: blue
5
+ colorTo: blue
6
  sdk: gradio
7
  sdk_version: 3.1.1
8
  app_file: app.py
9
  pinned: false
10
+ models:
11
+ - bigscience/bloom
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ import gradio as gr
4
+ import time
5
+ import clip
6
+ #from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
7
+ #from flores200_codes import flores_codes
8
+ import requests
9
+ import csv
10
+ import json
11
+ import wget
12
+
13
+ url_dict = {'clip_ViTL14_openimage_classifier_weights.pt': 'https://raw.githubusercontent.com/geonm/socratic-models-demo/master/prompts/clip_ViTL14_openimage_classifier_weights.pt',
14
+ 'clip_ViTL14_place365_classifier_weights.pt': 'https://raw.githubusercontent.com/geonm/socratic-models-demo/master/prompts/clip_ViTL14_place365_classifier_weights.pt',
15
+ 'clip_ViTL14_tencentml_classifier_weights.pt': 'https://raw.githubusercontent.com/geonm/socratic-models-demo/master/prompts/clip_ViTL14_tencentml_classifier_weights.pt'}
16
+
17
+ os.makedirs('./prompts', exist_ok=True)
18
+ for k, v in url_dict.items():
19
+ wget.download(v, out='./prompts')
20
+
21
+ os.environ['CUDA_VISIBLE_DEVICES'] = ''
22
+
23
+ API_URL = "https://api-inference.huggingface.co/models/bigscience/bloom"
24
+ HF_TOKEN = os.environ["HF_TOKEN"]
25
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
26
+
27
+ def load_openimage_classnames(csv_path):
28
+ csv_data = open(csv_path)
29
+ csv_reader = csv.reader(csv_data)
30
+ classnames = {idx: row[-1] for idx, row in enumerate(csv_reader)}
31
+ return classnames
32
+
33
+
34
+ def load_tencentml_classnames(txt_path):
35
+ txt_data = open(txt_path)
36
+ lines = txt_data.readlines()
37
+ classnames = {idx: line.strip() for idx, line in enumerate(lines)}
38
+ return classnames
39
+
40
+
41
+ def build_simple_classifier(clip_model, text_list, template, device):
42
+ with torch.no_grad():
43
+ texts = [template(text) for text in text_list]
44
+ text_inputs = clip.tokenize(texts).to(device)
45
+ text_features = clip_model.encode_text(text_inputs)
46
+ text_features /= text_features.norm(dim=-1, keepdim=True)
47
+
48
+ return text_features, {idx: text for idx, text in enumerate(text_list)}
49
+
50
+
51
+ def load_models():
52
+ # build model and tokenizer
53
+ model_dict = {}
54
+
55
+ device = "cuda" if torch.cuda.is_available() else "cpu"
56
+ print('\tLoading CLIP ViT-L/14')
57
+ clip_model, clip_preprocess = clip.load("ViT-L/14", device=device)
58
+ print('\tLoading precomputed zeroshot classifier')
59
+ openimage_classifier_weights = torch.load('./prompts/clip_ViTL14_openimage_classifier_weights.pt', map_location=device).type(torch.FloatTensor)
60
+ openimage_classnames = load_openimage_classnames('./prompts/openimage-classnames.csv')
61
+ tencentml_classifier_weights = torch.load('./prompts/clip_ViTL14_tencentml_classifier_weights.pt', map_location=device).type(torch.FloatTensor)
62
+ tencentml_classnames = load_tencentml_classnames('./prompts/tencent-ml-classnames.txt')
63
+ place365_classifier_weights = torch.load('./prompts/clip_ViTL14_place365_classifier_weights.pt', map_location=device).type(torch.FloatTensor)
64
+ place365_classnames = load_tencentml_classnames('./prompts/place365-classnames.txt')
65
+
66
+ print('\tBuilding simple zeroshot classifier')
67
+ img_types = ['photo', 'cartoon', 'sketch', 'painting']
68
+ ppl_texts = ['no people', 'people']
69
+ ifppl_texts = ['is one person', 'are two people', 'are three people', 'are several people', 'are many people']
70
+ imgtype_classifier_weights, imgtype_classnames = build_simple_classifier(clip_model, img_types, lambda c: f'This is a {c}.', device)
71
+ ppl_classifier_weights, ppl_classnames = build_simple_classifier(clip_model, ppl_texts, lambda c: f'There are {c} in this photo.', device)
72
+ ifppl_classifier_weights, ifppl_classnames = build_simple_classifier(clip_model, ifppl_texts, lambda c: f'There {c} in this photo.', device)
73
+
74
+ model_dict['clip_model'] = clip_model
75
+ model_dict['clip_preprocess'] = clip_preprocess
76
+ model_dict['openimage_classifier_weights'] = openimage_classifier_weights
77
+ model_dict['openimage_classnames'] = openimage_classnames
78
+ model_dict['tencentml_classifier_weights'] = tencentml_classifier_weights
79
+ model_dict['tencentml_classnames'] = tencentml_classnames
80
+ model_dict['place365_classifier_weights'] = place365_classifier_weights
81
+ model_dict['place365_classnames'] = place365_classnames
82
+ model_dict['imgtype_classifier_weights'] = imgtype_classifier_weights
83
+ model_dict['imgtype_classnames'] = imgtype_classnames
84
+ model_dict['ppl_classifier_weights'] = ppl_classifier_weights
85
+ model_dict['ppl_classnames'] = ppl_classnames
86
+ model_dict['ifppl_classifier_weights'] = ifppl_classifier_weights
87
+ model_dict['ifppl_classnames'] = ifppl_classnames
88
+ model_dict['device'] = device
89
+
90
+ return model_dict
91
+
92
+
93
+ def drop_gpu(tensor):
94
+ if torch.cuda.is_available():
95
+ return tensor.cpu().numpy()
96
+ else:
97
+ return tensor.numpy()
98
+
99
+
100
+ def zeroshot_classifier(image):
101
+ image_input = model_dict['clip_preprocess'](image).unsqueeze(0).to(model_dict['device'])
102
+ with torch.no_grad():
103
+ image_features = model_dict['clip_model'].encode_image(image_input)
104
+ image_features /= image_features.norm(dim=-1, keepdim=True)
105
+
106
+ sim = (100.0 * image_features @ model_dict['openimage_classifier_weights'].T).softmax(dim=-1)
107
+ openimage_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(10)]
108
+ openimage_classes = [model_dict['openimage_classnames'][idx] for idx in indices]
109
+
110
+ sim = (100.0 * image_features @ model_dict['tencentml_classifier_weights'].T).softmax(dim=-1)
111
+ tencentml_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(10)]
112
+ tencentml_classes = [model_dict['tencentml_classnames'][idx] for idx in indices]
113
+
114
+ sim = (100.0 * image_features @ model_dict['place365_classifier_weights'].T).softmax(dim=-1)
115
+ place365_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(10)]
116
+ place365_classes = [model_dict['place365_classnames'][idx] for idx in indices]
117
+
118
+ sim = (100.0 * image_features @ model_dict['imgtype_classifier_weights'].T).softmax(dim=-1)
119
+ imgtype_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(model_dict['imgtype_classnames']))]
120
+ imgtype_classes = [model_dict['imgtype_classnames'][idx] for idx in indices]
121
+
122
+ sim = (100.0 * image_features @ model_dict['ppl_classifier_weights'].T).softmax(dim=-1)
123
+ ppl_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(model_dict['ppl_classnames']))]
124
+ ppl_classes = [model_dict['ppl_classnames'][idx] for idx in indices]
125
+
126
+ sim = (100.0 * image_features @ model_dict['ifppl_classifier_weights'].T).softmax(dim=-1)
127
+ ifppl_scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(model_dict['ifppl_classnames']))]
128
+ ifppl_classes = [model_dict['ifppl_classnames'][idx] for idx in indices]
129
+
130
+ return image_features, openimage_scores, openimage_classes, tencentml_scores, tencentml_classes,\
131
+ place365_scores, place365_classes, imgtype_scores, imgtype_classes,\
132
+ ppl_scores, ppl_classes, ifppl_scores, ifppl_classes
133
+
134
+
135
+ def generate_prompt(openimage_classes, tencentml_classes, place365_classes, imgtype_classes, ppl_classes, ifppl_classes):
136
+ img_type = imgtype_classes[0]
137
+ ppl_result = ppl_classes[0]
138
+ if ppl_result == 'people':
139
+ ppl_result = ifppl_classes[0]
140
+ else:
141
+ ppl_result = 'are %s' % ppl_result
142
+
143
+ sorted_places = place365_classes
144
+
145
+ object_list = ''
146
+ for cls in tencentml_classes:
147
+ object_list += f'{cls}, '
148
+ for cls in openimage_classes[:2]:
149
+ object_list += f'{cls}, '
150
+ object_list = object_list[:-2]
151
+
152
+ prompt_caption = f'''I am an intelligent image captioning bot.
153
+ This image is a {img_type}. There {ppl_result}.
154
+ I think this photo was taken at a {sorted_places[0]}, {sorted_places[1]}, or {sorted_places[2]}.
155
+ I think there might be a {object_list} in this {img_type}.
156
+ A creative short caption I can generate to describe this image is:'''
157
+
158
+ #prompt_search = f'''Let's list keywords that include the following description.
159
+ #This image is a {img_type}. There {ppl_result}.
160
+ #I think this photo was taken at a {sorted_places[0]}, {sorted_places[1]}, or {sorted_places[2]}.
161
+ #I think there might be a {object_list} in this {img_type}.
162
+ #Relevant keywords which we can list and are seperated with comma are:'''
163
+
164
+ return prompt_caption
165
+
166
+
167
+ def generate_captions(prompt, num_captions=3):
168
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"}
169
+
170
+ max_length = 16
171
+ seed = 42
172
+ sample_or_greedy = 'Greedy'
173
+ input_sentence = prompt
174
+ if sample_or_greedy == "Sample":
175
+ parameters = {
176
+ "max_new_tokens": max_length,
177
+ "top_p": 0.7,
178
+ "do_sample": True,
179
+ "seed": seed,
180
+ "early_stopping": False,
181
+ "length_penalty": 0.0,
182
+ "eos_token_id": None,
183
+ }
184
+ else:
185
+ parameters = {
186
+ "max_new_tokens": max_length,
187
+ "do_sample": False,
188
+ "seed": seed,
189
+ "early_stopping": False,
190
+ "length_penalty": 0.0,
191
+ "eos_token_id": None,
192
+ }
193
+
194
+ payload = {"inputs": input_sentence, "parameters": parameters,"options" : {"use_cache": False}}
195
+
196
+ bloom_results = []
197
+ for _ in range(num_captions):
198
+ response = requests.post(API_URL, headers=headers, json=payload)
199
+ output = response.json()
200
+ generated_text = output[0]['generated_text'].replace(prompt, '')
201
+ bloom_results.append(generated_text)
202
+ return bloom_results
203
+
204
+
205
+ def sorting_texts(image_features, captions):
206
+ with torch.no_grad():
207
+ text_inputs = clip.tokenize(captions).to(model_dict['device'])
208
+ text_features = model_dict['clip_model'].encode_text(text_inputs)
209
+ text_features /= text_features.norm(dim=-1, keepdim=True)
210
+
211
+ sim = (100.0 * image_features @ text_features.T).softmax(dim=-1)
212
+ scores, indices = [drop_gpu(tensor) for tensor in sim[0].topk(len(captions))]
213
+ sorted_captions = [captions[idx] for idx in indices]
214
+
215
+ return scores, sorted_captions
216
+
217
+
218
+ def postprocess_results(scores, classes):
219
+ scores = [float('%.4f' % float(val)) for val in scores]
220
+ outputs = []
221
+ for score, cls in zip(scores, classes):
222
+ outputs.append({'score': score, 'output': cls})
223
+ return outputs
224
+
225
+
226
+ def image_captioning(image):
227
+ start_time = time.time()
228
+ image_features, openimage_scores, openimage_classes, tencentml_scores, tencentml_classes, place365_scores, place365_classes, imgtype_scores, imgtype_classes, ppl_scores, ppl_classes, ifppl_scores, ifppl_classes = zeroshot_classifier(image)
229
+ end_zeroshot = time.time()
230
+ prompt_caption = generate_prompt(openimage_classes, tencentml_classes, place365_classes, imgtype_classes, ppl_classes, ifppl_classes)
231
+ generated_captions = generate_captions(prompt_caption, num_captions=1)
232
+ end_bloom = time.time()
233
+ caption_scores, sorted_captions = sorting_texts(image_features, generated_captions)
234
+
235
+ output_dict = {}
236
+ output_dict['inference_time'] = {'CLIP inference': end_zeroshot - start_time,
237
+ 'BLOOM request': end_bloom - end_zeroshot}
238
+
239
+ output_dict['generated_captions'] = postprocess_results(caption_scores, sorted_captions)
240
+ output_dict['reasoning'] = {'openimage_results': postprocess_results(openimage_scores, openimage_classes),
241
+ 'tencentml_results': postprocess_results(tencentml_scores, tencentml_classes),
242
+ 'place365_results': postprocess_results(place365_scores, place365_classes),
243
+ 'imgtype_results': postprocess_results(imgtype_scores, imgtype_classes),
244
+ 'ppl_results': postprocess_results(ppl_scores, ppl_classes),
245
+ 'ifppl_results': postprocess_results(ifppl_scores, ifppl_classes)}
246
+ return output_dict
247
+
248
+
249
+ if __name__ == '__main__':
250
+ print('\tinit models')
251
+
252
+ global model_dict
253
+
254
+ model_dict = load_models()
255
+
256
+ # define gradio demo
257
+ inputs = [gr.inputs.Image(type="pil", label="Image")
258
+ ]
259
+
260
+ outputs = gr.outputs.JSON()
261
+
262
+ title = "Socratic models for image captioning with BLOOM"
263
+
264
+ demo_status = "Demo is running on CPU"
265
+ description = f"Details: https://github.com/geonm/socratic-models-demo. {demo_status}"
266
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2204.00598'>Socratic Models: Composing Zero-Shot Multimodal Reasoning with Language</a></p>"
267
+ examples = ['k21-1.jpg']
268
+
269
+ gr.Interface(image_captioning,
270
+ inputs,
271
+ outputs,
272
+ title=title,
273
+ description=description,
274
+ article=article,
275
+ examples=examples,
276
+ #examples_per_page=50,
277
+ ).launch()
k21-1.jpg ADDED
prompts/categories_places365.txt ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /a/airfield 0
2
+ /a/airplane_cabin 1
3
+ /a/airport_terminal 2
4
+ /a/alcove 3
5
+ /a/alley 4
6
+ /a/amphitheater 5
7
+ /a/amusement_arcade 6
8
+ /a/amusement_park 7
9
+ /a/apartment_building/outdoor 8
10
+ /a/aquarium 9
11
+ /a/aqueduct 10
12
+ /a/arcade 11
13
+ /a/arch 12
14
+ /a/archaelogical_excavation 13
15
+ /a/archive 14
16
+ /a/arena/hockey 15
17
+ /a/arena/performance 16
18
+ /a/arena/rodeo 17
19
+ /a/army_base 18
20
+ /a/art_gallery 19
21
+ /a/art_school 20
22
+ /a/art_studio 21
23
+ /a/artists_loft 22
24
+ /a/assembly_line 23
25
+ /a/athletic_field/outdoor 24
26
+ /a/atrium/public 25
27
+ /a/attic 26
28
+ /a/auditorium 27
29
+ /a/auto_factory 28
30
+ /a/auto_showroom 29
31
+ /b/badlands 30
32
+ /b/bakery/shop 31
33
+ /b/balcony/exterior 32
34
+ /b/balcony/interior 33
35
+ /b/ball_pit 34
36
+ /b/ballroom 35
37
+ /b/bamboo_forest 36
38
+ /b/bank_vault 37
39
+ /b/banquet_hall 38
40
+ /b/bar 39
41
+ /b/barn 40
42
+ /b/barndoor 41
43
+ /b/baseball_field 42
44
+ /b/basement 43
45
+ /b/basketball_court/indoor 44
46
+ /b/bathroom 45
47
+ /b/bazaar/indoor 46
48
+ /b/bazaar/outdoor 47
49
+ /b/beach 48
50
+ /b/beach_house 49
51
+ /b/beauty_salon 50
52
+ /b/bedchamber 51
53
+ /b/bedroom 52
54
+ /b/beer_garden 53
55
+ /b/beer_hall 54
56
+ /b/berth 55
57
+ /b/biology_laboratory 56
58
+ /b/boardwalk 57
59
+ /b/boat_deck 58
60
+ /b/boathouse 59
61
+ /b/bookstore 60
62
+ /b/booth/indoor 61
63
+ /b/botanical_garden 62
64
+ /b/bow_window/indoor 63
65
+ /b/bowling_alley 64
66
+ /b/boxing_ring 65
67
+ /b/bridge 66
68
+ /b/building_facade 67
69
+ /b/bullring 68
70
+ /b/burial_chamber 69
71
+ /b/bus_interior 70
72
+ /b/bus_station/indoor 71
73
+ /b/butchers_shop 72
74
+ /b/butte 73
75
+ /c/cabin/outdoor 74
76
+ /c/cafeteria 75
77
+ /c/campsite 76
78
+ /c/campus 77
79
+ /c/canal/natural 78
80
+ /c/canal/urban 79
81
+ /c/candy_store 80
82
+ /c/canyon 81
83
+ /c/car_interior 82
84
+ /c/carrousel 83
85
+ /c/castle 84
86
+ /c/catacomb 85
87
+ /c/cemetery 86
88
+ /c/chalet 87
89
+ /c/chemistry_lab 88
90
+ /c/childs_room 89
91
+ /c/church/indoor 90
92
+ /c/church/outdoor 91
93
+ /c/classroom 92
94
+ /c/clean_room 93
95
+ /c/cliff 94
96
+ /c/closet 95
97
+ /c/clothing_store 96
98
+ /c/coast 97
99
+ /c/cockpit 98
100
+ /c/coffee_shop 99
101
+ /c/computer_room 100
102
+ /c/conference_center 101
103
+ /c/conference_room 102
104
+ /c/construction_site 103
105
+ /c/corn_field 104
106
+ /c/corral 105
107
+ /c/corridor 106
108
+ /c/cottage 107
109
+ /c/courthouse 108
110
+ /c/courtyard 109
111
+ /c/creek 110
112
+ /c/crevasse 111
113
+ /c/crosswalk 112
114
+ /d/dam 113
115
+ /d/delicatessen 114
116
+ /d/department_store 115
117
+ /d/desert/sand 116
118
+ /d/desert/vegetation 117
119
+ /d/desert_road 118
120
+ /d/diner/outdoor 119
121
+ /d/dining_hall 120
122
+ /d/dining_room 121
123
+ /d/discotheque 122
124
+ /d/doorway/outdoor 123
125
+ /d/dorm_room 124
126
+ /d/downtown 125
127
+ /d/dressing_room 126
128
+ /d/driveway 127
129
+ /d/drugstore 128
130
+ /e/elevator/door 129
131
+ /e/elevator_lobby 130
132
+ /e/elevator_shaft 131
133
+ /e/embassy 132
134
+ /e/engine_room 133
135
+ /e/entrance_hall 134
136
+ /e/escalator/indoor 135
137
+ /e/excavation 136
138
+ /f/fabric_store 137
139
+ /f/farm 138
140
+ /f/fastfood_restaurant 139
141
+ /f/field/cultivated 140
142
+ /f/field/wild 141
143
+ /f/field_road 142
144
+ /f/fire_escape 143
145
+ /f/fire_station 144
146
+ /f/fishpond 145
147
+ /f/flea_market/indoor 146
148
+ /f/florist_shop/indoor 147
149
+ /f/food_court 148
150
+ /f/football_field 149
151
+ /f/forest/broadleaf 150
152
+ /f/forest_path 151
153
+ /f/forest_road 152
154
+ /f/formal_garden 153
155
+ /f/fountain 154
156
+ /g/galley 155
157
+ /g/garage/indoor 156
158
+ /g/garage/outdoor 157
159
+ /g/gas_station 158
160
+ /g/gazebo/exterior 159
161
+ /g/general_store/indoor 160
162
+ /g/general_store/outdoor 161
163
+ /g/gift_shop 162
164
+ /g/glacier 163
165
+ /g/golf_course 164
166
+ /g/greenhouse/indoor 165
167
+ /g/greenhouse/outdoor 166
168
+ /g/grotto 167
169
+ /g/gymnasium/indoor 168
170
+ /h/hangar/indoor 169
171
+ /h/hangar/outdoor 170
172
+ /h/harbor 171
173
+ /h/hardware_store 172
174
+ /h/hayfield 173
175
+ /h/heliport 174
176
+ /h/highway 175
177
+ /h/home_office 176
178
+ /h/home_theater 177
179
+ /h/hospital 178
180
+ /h/hospital_room 179
181
+ /h/hot_spring 180
182
+ /h/hotel/outdoor 181
183
+ /h/hotel_room 182
184
+ /h/house 183
185
+ /h/hunting_lodge/outdoor 184
186
+ /i/ice_cream_parlor 185
187
+ /i/ice_floe 186
188
+ /i/ice_shelf 187
189
+ /i/ice_skating_rink/indoor 188
190
+ /i/ice_skating_rink/outdoor 189
191
+ /i/iceberg 190
192
+ /i/igloo 191
193
+ /i/industrial_area 192
194
+ /i/inn/outdoor 193
195
+ /i/islet 194
196
+ /j/jacuzzi/indoor 195
197
+ /j/jail_cell 196
198
+ /j/japanese_garden 197
199
+ /j/jewelry_shop 198
200
+ /j/junkyard 199
201
+ /k/kasbah 200
202
+ /k/kennel/outdoor 201
203
+ /k/kindergarden_classroom 202
204
+ /k/kitchen 203
205
+ /l/lagoon 204
206
+ /l/lake/natural 205
207
+ /l/landfill 206
208
+ /l/landing_deck 207
209
+ /l/laundromat 208
210
+ /l/lawn 209
211
+ /l/lecture_room 210
212
+ /l/legislative_chamber 211
213
+ /l/library/indoor 212
214
+ /l/library/outdoor 213
215
+ /l/lighthouse 214
216
+ /l/living_room 215
217
+ /l/loading_dock 216
218
+ /l/lobby 217
219
+ /l/lock_chamber 218
220
+ /l/locker_room 219
221
+ /m/mansion 220
222
+ /m/manufactured_home 221
223
+ /m/market/indoor 222
224
+ /m/market/outdoor 223
225
+ /m/marsh 224
226
+ /m/martial_arts_gym 225
227
+ /m/mausoleum 226
228
+ /m/medina 227
229
+ /m/mezzanine 228
230
+ /m/moat/water 229
231
+ /m/mosque/outdoor 230
232
+ /m/motel 231
233
+ /m/mountain 232
234
+ /m/mountain_path 233
235
+ /m/mountain_snowy 234
236
+ /m/movie_theater/indoor 235
237
+ /m/museum/indoor 236
238
+ /m/museum/outdoor 237
239
+ /m/music_studio 238
240
+ /n/natural_history_museum 239
241
+ /n/nursery 240
242
+ /n/nursing_home 241
243
+ /o/oast_house 242
244
+ /o/ocean 243
245
+ /o/office 244
246
+ /o/office_building 245
247
+ /o/office_cubicles 246
248
+ /o/oilrig 247
249
+ /o/operating_room 248
250
+ /o/orchard 249
251
+ /o/orchestra_pit 250
252
+ /p/pagoda 251
253
+ /p/palace 252
254
+ /p/pantry 253
255
+ /p/park 254
256
+ /p/parking_garage/indoor 255
257
+ /p/parking_garage/outdoor 256
258
+ /p/parking_lot 257
259
+ /p/pasture 258
260
+ /p/patio 259
261
+ /p/pavilion 260
262
+ /p/pet_shop 261
263
+ /p/pharmacy 262
264
+ /p/phone_booth 263
265
+ /p/physics_laboratory 264
266
+ /p/picnic_area 265
267
+ /p/pier 266
268
+ /p/pizzeria 267
269
+ /p/playground 268
270
+ /p/playroom 269
271
+ /p/plaza 270
272
+ /p/pond 271
273
+ /p/porch 272
274
+ /p/promenade 273
275
+ /p/pub/indoor 274
276
+ /r/racecourse 275
277
+ /r/raceway 276
278
+ /r/raft 277
279
+ /r/railroad_track 278
280
+ /r/rainforest 279
281
+ /r/reception 280
282
+ /r/recreation_room 281
283
+ /r/repair_shop 282
284
+ /r/residential_neighborhood 283
285
+ /r/restaurant 284
286
+ /r/restaurant_kitchen 285
287
+ /r/restaurant_patio 286
288
+ /r/rice_paddy 287
289
+ /r/river 288
290
+ /r/rock_arch 289
291
+ /r/roof_garden 290
292
+ /r/rope_bridge 291
293
+ /r/ruin 292
294
+ /r/runway 293
295
+ /s/sandbox 294
296
+ /s/sauna 295
297
+ /s/schoolhouse 296
298
+ /s/science_museum 297
299
+ /s/server_room 298
300
+ /s/shed 299
301
+ /s/shoe_shop 300
302
+ /s/shopfront 301
303
+ /s/shopping_mall/indoor 302
304
+ /s/shower 303
305
+ /s/ski_resort 304
306
+ /s/ski_slope 305
307
+ /s/sky 306
308
+ /s/skyscraper 307
309
+ /s/slum 308
310
+ /s/snowfield 309
311
+ /s/soccer_field 310
312
+ /s/stable 311
313
+ /s/stadium/baseball 312
314
+ /s/stadium/football 313
315
+ /s/stadium/soccer 314
316
+ /s/stage/indoor 315
317
+ /s/stage/outdoor 316
318
+ /s/staircase 317
319
+ /s/storage_room 318
320
+ /s/street 319
321
+ /s/subway_station/platform 320
322
+ /s/supermarket 321
323
+ /s/sushi_bar 322
324
+ /s/swamp 323
325
+ /s/swimming_hole 324
326
+ /s/swimming_pool/indoor 325
327
+ /s/swimming_pool/outdoor 326
328
+ /s/synagogue/outdoor 327
329
+ /t/television_room 328
330
+ /t/television_studio 329
331
+ /t/temple/asia 330
332
+ /t/throne_room 331
333
+ /t/ticket_booth 332
334
+ /t/topiary_garden 333
335
+ /t/tower 334
336
+ /t/toyshop 335
337
+ /t/train_interior 336
338
+ /t/train_station/platform 337
339
+ /t/tree_farm 338
340
+ /t/tree_house 339
341
+ /t/trench 340
342
+ /t/tundra 341
343
+ /u/underwater/ocean_deep 342
344
+ /u/utility_room 343
345
+ /v/valley 344
346
+ /v/vegetable_garden 345
347
+ /v/veterinarians_office 346
348
+ /v/viaduct 347
349
+ /v/village 348
350
+ /v/vineyard 349
351
+ /v/volcano 350
352
+ /v/volleyball_court/outdoor 351
353
+ /w/waiting_room 352
354
+ /w/water_park 353
355
+ /w/water_tower 354
356
+ /w/waterfall 355
357
+ /w/watering_hole 356
358
+ /w/wave 357
359
+ /w/wet_bar 358
360
+ /w/wheat_field 359
361
+ /w/wind_farm 360
362
+ /w/windmill 361
363
+ /y/yard 362
364
+ /y/youth_hostel 363
365
+ /z/zen_garden 364
prompts/extract_text_features.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import torch
4
+ import clip
5
+ import csv
6
+ import tqdm
7
+ from profanity_filter import ProfanityFilter
8
+
9
+
10
+ templates = [
11
+ lambda c: f'a bad photo of a {c}.',
12
+ lambda c: f'a photo of many {c}.',
13
+ lambda c: f'a sculpture of a {c}.',
14
+ lambda c: f'a photo of the hard to see {c}.',
15
+ lambda c: f'a low resolution photo of the {c}.',
16
+ lambda c: f'a rendering of a {c}.',
17
+ lambda c: f'graffiti of a {c}.',
18
+ lambda c: f'a bad photo of the {c}.',
19
+ lambda c: f'a cropped photo of the {c}.',
20
+ lambda c: f'a tattoo of a {c}.',
21
+ lambda c: f'the embroidered {c}.',
22
+ lambda c: f'a photo of a hard to see {c}.',
23
+ lambda c: f'a bright photo of a {c}.',
24
+ lambda c: f'a photo of a clean {c}.',
25
+ lambda c: f'a photo of a dirty {c}.',
26
+ lambda c: f'a dark photo of the {c}.',
27
+ lambda c: f'a drawing of a {c}.',
28
+ lambda c: f'a photo of my {c}.',
29
+ lambda c: f'the plastic {c}.',
30
+ lambda c: f'a photo of the cool {c}.',
31
+ lambda c: f'a close-up photo of a {c}.',
32
+ lambda c: f'a black and white photo of the {c}.',
33
+ lambda c: f'a painting of the {c}.',
34
+ lambda c: f'a painting of a {c}.',
35
+ lambda c: f'a pixelated photo of the {c}.',
36
+ lambda c: f'a sculpture of the {c}.',
37
+ lambda c: f'a bright photo of the {c}.',
38
+ lambda c: f'a cropped photo of a {c}.',
39
+ lambda c: f'a plastic {c}.',
40
+ lambda c: f'a photo of the dirty {c}.',
41
+ lambda c: f'a jpeg corrupted photo of a {c}.',
42
+ lambda c: f'a blurry photo of the {c}.',
43
+ lambda c: f'a photo of the {c}.',
44
+ lambda c: f'a good photo of the {c}.',
45
+ lambda c: f'a rendering of the {c}.',
46
+ lambda c: f'a {c} in a video game.',
47
+ lambda c: f'a photo of one {c}.',
48
+ lambda c: f'a doodle of a {c}.',
49
+ lambda c: f'a close-up photo of the {c}.',
50
+ lambda c: f'a photo of a {c}.',
51
+ lambda c: f'the origami {c}.',
52
+ lambda c: f'the {c} in a video game.',
53
+ lambda c: f'a sketch of a {c}.',
54
+ lambda c: f'a doodle of the {c}.',
55
+ lambda c: f'a origami {c}.',
56
+ lambda c: f'a low resolution photo of a {c}.',
57
+ lambda c: f'the toy {c}.',
58
+ lambda c: f'a rendition of the {c}.',
59
+ lambda c: f'a photo of the clean {c}.',
60
+ lambda c: f'a photo of a large {c}.',
61
+ lambda c: f'a rendition of a {c}.',
62
+ lambda c: f'a photo of a nice {c}.',
63
+ lambda c: f'a photo of a weird {c}.',
64
+ lambda c: f'a blurry photo of a {c}.',
65
+ lambda c: f'a cartoon {c}.',
66
+ lambda c: f'art of a {c}.',
67
+ lambda c: f'a sketch of the {c}.',
68
+ lambda c: f'a embroidered {c}.',
69
+ lambda c: f'a pixelated photo of a {c}.',
70
+ lambda c: f'itap of the {c}.',
71
+ lambda c: f'a jpeg corrupted photo of the {c}.',
72
+ lambda c: f'a good photo of a {c}.',
73
+ lambda c: f'a plushie {c}.',
74
+ lambda c: f'a photo of the nice {c}.',
75
+ lambda c: f'a photo of the small {c}.',
76
+ lambda c: f'a photo of the weird {c}.',
77
+ lambda c: f'the cartoon {c}.',
78
+ lambda c: f'art of the {c}.',
79
+ lambda c: f'a drawing of the {c}.',
80
+ lambda c: f'a photo of the large {c}.',
81
+ lambda c: f'a black and white photo of a {c}.',
82
+ lambda c: f'the plushie {c}.',
83
+ lambda c: f'a dark photo of a {c}.',
84
+ lambda c: f'itap of a {c}.',
85
+ lambda c: f'graffiti of the {c}.',
86
+ lambda c: f'a toy {c}.',
87
+ lambda c: f'itap of my {c}.',
88
+ lambda c: f'a photo of a cool {c}.',
89
+ lambda c: f'a photo of a small {c}.',
90
+ lambda c: f'a tattoo of the {c}.',
91
+ ]
92
+
93
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
94
+ device = "cuda" if torch.cuda.is_available() else "cpu"
95
+ clip_model, clip_preprocess = clip.load("ViT-L/14", device=device)
96
+
97
+ '''
98
+ csv_data = open('openimage-classnames.csv')
99
+ csv_reader = csv.reader(csv_data)
100
+ class_names = []
101
+ for row in csv_reader:
102
+ class_names.append(row[-1])
103
+ '''
104
+ '''
105
+ txt_data = open('tencent-ml-images.txt')
106
+ pf = ProfanityFilter()
107
+ lines = txt_data.readlines()
108
+ class_names = []
109
+ for line in lines[4:]:
110
+ class_name_precook = line.strip().split('\t')[-1]
111
+ safe_list = ''
112
+ for class_name in class_name_precook.split(', '):
113
+ if pf.is_clean(class_name):
114
+ safe_list += '%s, ' % class_name
115
+ safe_list = safe_list[:-2]
116
+ if len(safe_list) > 0:
117
+ class_names.append(safe_list)
118
+ f_w = open('tencent-ml-classnames.txt', 'w')
119
+ for cln in class_names:
120
+ f_w.write('%s\n' % cln)
121
+ f_w.close()
122
+ '''
123
+ place_categories = np.loadtxt('categories_places365.txt', dtype=str)
124
+ place_texts = []
125
+ for place in place_categories[:, 0]:
126
+ place = place.split('/')[2:]
127
+ if len(place) > 1:
128
+ place = place[1] + ' ' + place[0]
129
+ else:
130
+ place = place[0]
131
+ place = place.replace('_', ' ')
132
+ place_texts.append(place)
133
+ class_names = place_texts
134
+ f_w = open('place365-classnames.txt', 'w')
135
+ for cln in class_names:
136
+ f_w.write('%s\n' % cln)
137
+ f_w.close()
138
+ print(class_names)
139
+
140
+ class_weights = []
141
+ with torch.no_grad():
142
+ for classname in tqdm.tqdm(class_names, desc='encoding text'):
143
+ texts = [template(classname) for template in templates]
144
+ text_inputs = clip.tokenize(texts).to(device)
145
+ text_features = clip_model.encode_text(text_inputs)
146
+ text_features /= text_features.norm(dim=-1, keepdim=True)
147
+ text_features = text_features.mean(dim=0)
148
+ text_features /= text_features.norm()
149
+ class_weights.append(text_features)
150
+
151
+ class_weights = torch.stack(class_weights)
152
+ print(class_weights.shape)
153
+ #torch.save(class_weights, 'clip_ViTL14_openimage_classifier_weights.pt')
154
+ torch.save(class_weights, 'clip_ViTL14_place365_classifier_weights.pt')
prompts/openimage-classnames.csv ADDED
The diff for this file is too large to render. See raw diff
 
prompts/place365-classnames.txt ADDED
@@ -0,0 +1,365 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ airfield
2
+ airplane cabin
3
+ airport terminal
4
+ alcove
5
+ alley
6
+ amphitheater
7
+ amusement arcade
8
+ amusement park
9
+ outdoor apartment building
10
+ aquarium
11
+ aqueduct
12
+ arcade
13
+ arch
14
+ archaelogical excavation
15
+ archive
16
+ hockey arena
17
+ performance arena
18
+ rodeo arena
19
+ army base
20
+ art gallery
21
+ art school
22
+ art studio
23
+ artists loft
24
+ assembly line
25
+ outdoor athletic field
26
+ public atrium
27
+ attic
28
+ auditorium
29
+ auto factory
30
+ auto showroom
31
+ badlands
32
+ shop bakery
33
+ exterior balcony
34
+ interior balcony
35
+ ball pit
36
+ ballroom
37
+ bamboo forest
38
+ bank vault
39
+ banquet hall
40
+ bar
41
+ barn
42
+ barndoor
43
+ baseball field
44
+ basement
45
+ indoor basketball court
46
+ bathroom
47
+ indoor bazaar
48
+ outdoor bazaar
49
+ beach
50
+ beach house
51
+ beauty salon
52
+ bedchamber
53
+ bedroom
54
+ beer garden
55
+ beer hall
56
+ berth
57
+ biology laboratory
58
+ boardwalk
59
+ boat deck
60
+ boathouse
61
+ bookstore
62
+ indoor booth
63
+ botanical garden
64
+ indoor bow window
65
+ bowling alley
66
+ boxing ring
67
+ bridge
68
+ building facade
69
+ bullring
70
+ burial chamber
71
+ bus interior
72
+ indoor bus station
73
+ butchers shop
74
+ butte
75
+ outdoor cabin
76
+ cafeteria
77
+ campsite
78
+ campus
79
+ natural canal
80
+ urban canal
81
+ candy store
82
+ canyon
83
+ car interior
84
+ carrousel
85
+ castle
86
+ catacomb
87
+ cemetery
88
+ chalet
89
+ chemistry lab
90
+ childs room
91
+ indoor church
92
+ outdoor church
93
+ classroom
94
+ clean room
95
+ cliff
96
+ closet
97
+ clothing store
98
+ coast
99
+ cockpit
100
+ coffee shop
101
+ computer room
102
+ conference center
103
+ conference room
104
+ construction site
105
+ corn field
106
+ corral
107
+ corridor
108
+ cottage
109
+ courthouse
110
+ courtyard
111
+ creek
112
+ crevasse
113
+ crosswalk
114
+ dam
115
+ delicatessen
116
+ department store
117
+ sand desert
118
+ vegetation desert
119
+ desert road
120
+ outdoor diner
121
+ dining hall
122
+ dining room
123
+ discotheque
124
+ outdoor doorway
125
+ dorm room
126
+ downtown
127
+ dressing room
128
+ driveway
129
+ drugstore
130
+ door elevator
131
+ elevator lobby
132
+ elevator shaft
133
+ embassy
134
+ engine room
135
+ entrance hall
136
+ indoor escalator
137
+ excavation
138
+ fabric store
139
+ farm
140
+ fastfood restaurant
141
+ cultivated field
142
+ wild field
143
+ field road
144
+ fire escape
145
+ fire station
146
+ fishpond
147
+ indoor flea market
148
+ indoor florist shop
149
+ food court
150
+ football field
151
+ broadleaf forest
152
+ forest path
153
+ forest road
154
+ formal garden
155
+ fountain
156
+ galley
157
+ indoor garage
158
+ outdoor garage
159
+ gas station
160
+ exterior gazebo
161
+ indoor general store
162
+ outdoor general store
163
+ gift shop
164
+ glacier
165
+ golf course
166
+ indoor greenhouse
167
+ outdoor greenhouse
168
+ grotto
169
+ indoor gymnasium
170
+ indoor hangar
171
+ outdoor hangar
172
+ harbor
173
+ hardware store
174
+ hayfield
175
+ heliport
176
+ highway
177
+ home office
178
+ home theater
179
+ hospital
180
+ hospital room
181
+ hot spring
182
+ outdoor hotel
183
+ hotel room
184
+ house
185
+ outdoor hunting lodge
186
+ ice cream parlor
187
+ ice floe
188
+ ice shelf
189
+ indoor ice skating rink
190
+ outdoor ice skating rink
191
+ iceberg
192
+ igloo
193
+ industrial area
194
+ outdoor inn
195
+ islet
196
+ indoor jacuzzi
197
+ jail cell
198
+ japanese garden
199
+ jewelry shop
200
+ junkyard
201
+ kasbah
202
+ outdoor kennel
203
+ kindergarden classroom
204
+ kitchen
205
+ lagoon
206
+ natural lake
207
+ landfill
208
+ landing deck
209
+ laundromat
210
+ lawn
211
+ lecture room
212
+ legislative chamber
213
+ indoor library
214
+ outdoor library
215
+ lighthouse
216
+ living room
217
+ loading dock
218
+ lobby
219
+ lock chamber
220
+ locker room
221
+ mansion
222
+ manufactured home
223
+ indoor market
224
+ outdoor market
225
+ marsh
226
+ martial arts gym
227
+ mausoleum
228
+ medina
229
+ mezzanine
230
+ water moat
231
+ outdoor mosque
232
+ motel
233
+ mountain
234
+ mountain path
235
+ mountain snowy
236
+ indoor movie theater
237
+ indoor museum
238
+ outdoor museum
239
+ music studio
240
+ natural history museum
241
+ nursery
242
+ nursing home
243
+ oast house
244
+ ocean
245
+ office
246
+ office building
247
+ office cubicles
248
+ oilrig
249
+ operating room
250
+ orchard
251
+ orchestra pit
252
+ pagoda
253
+ palace
254
+ pantry
255
+ park
256
+ indoor parking garage
257
+ outdoor parking garage
258
+ parking lot
259
+ pasture
260
+ patio
261
+ pavilion
262
+ pet shop
263
+ pharmacy
264
+ phone booth
265
+ physics laboratory
266
+ picnic area
267
+ pier
268
+ pizzeria
269
+ playground
270
+ playroom
271
+ plaza
272
+ pond
273
+ porch
274
+ promenade
275
+ indoor pub
276
+ racecourse
277
+ raceway
278
+ raft
279
+ railroad track
280
+ rainforest
281
+ reception
282
+ recreation room
283
+ repair shop
284
+ residential neighborhood
285
+ restaurant
286
+ restaurant kitchen
287
+ restaurant patio
288
+ rice paddy
289
+ river
290
+ rock arch
291
+ roof garden
292
+ rope bridge
293
+ ruin
294
+ runway
295
+ sandbox
296
+ sauna
297
+ schoolhouse
298
+ science museum
299
+ server room
300
+ shed
301
+ shoe shop
302
+ shopfront
303
+ indoor shopping mall
304
+ shower
305
+ ski resort
306
+ ski slope
307
+ sky
308
+ skyscraper
309
+ slum
310
+ snowfield
311
+ soccer field
312
+ stable
313
+ baseball stadium
314
+ football stadium
315
+ soccer stadium
316
+ indoor stage
317
+ outdoor stage
318
+ staircase
319
+ storage room
320
+ street
321
+ platform subway station
322
+ supermarket
323
+ sushi bar
324
+ swamp
325
+ swimming hole
326
+ indoor swimming pool
327
+ outdoor swimming pool
328
+ outdoor synagogue
329
+ television room
330
+ television studio
331
+ asia temple
332
+ throne room
333
+ ticket booth
334
+ topiary garden
335
+ tower
336
+ toyshop
337
+ train interior
338
+ platform train station
339
+ tree farm
340
+ tree house
341
+ trench
342
+ tundra
343
+ ocean deep underwater
344
+ utility room
345
+ valley
346
+ vegetable garden
347
+ veterinarians office
348
+ viaduct
349
+ village
350
+ vineyard
351
+ volcano
352
+ outdoor volleyball court
353
+ waiting room
354
+ water park
355
+ water tower
356
+ waterfall
357
+ watering hole
358
+ wave
359
+ wet bar
360
+ wheat field
361
+ wind farm
362
+ windmill
363
+ yard
364
+ youth hostel
365
+ zen garden
prompts/tencent-ml-classnames.txt ADDED
The diff for this file is too large to render. See raw diff
 
prompts/tencent-ml-images.txt ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ git+https://github.com/huggingface/transformers
2
+ ftfy
3
+ regex
4
+ tqdm
5
+ git+https://github.com/openai/CLIP.git
6
+ gradio
7
+ torch
8
+ wget