{ | |
"_name_or_path": "openai/clip-vit-large-patch14-336", | |
"architectures": [ | |
"CLIPForImageClassification" | |
], | |
"id2label": { | |
"0": "A", | |
"1": "B", | |
"2": "C", | |
"3": "D", | |
"4": "E", | |
"5": "F", | |
"6": "G", | |
"7": "H", | |
"8": "I", | |
"9": "J", | |
"10": "K", | |
"11": "L", | |
"12": "M", | |
"13": "N", | |
"14": "O", | |
"15": "P", | |
"16": "Q", | |
"17": "R", | |
"18": "S", | |
"19": "T", | |
"20": "U", | |
"21": "V", | |
"22": "W", | |
"23": "X", | |
"24": "Y", | |
"25": "Z" | |
}, | |
"initializer_factor": 1.0, | |
"label2id": { | |
"A": 0, | |
"B": 1, | |
"C": 2, | |
"D": 3, | |
"E": 4, | |
"F": 5, | |
"G": 6, | |
"H": 7, | |
"I": 8, | |
"J": 9, | |
"K": 10, | |
"L": 11, | |
"M": 12, | |
"N": 13, | |
"O": 14, | |
"P": 15, | |
"Q": 16, | |
"R": 17, | |
"S": 18, | |
"T": 19, | |
"U": 20, | |
"V": 21, | |
"W": 22, | |
"X": 23, | |
"Y": 24, | |
"Z": 25 | |
}, | |
"logit_scale_init_value": 2.6592, | |
"model_type": "clip", | |
"problem_type": "single_label_classification", | |
"projection_dim": 768, | |
"text_config": { | |
"dropout": 0.0, | |
"hidden_size": 768, | |
"intermediate_size": 3072, | |
"model_type": "clip_text_model", | |
"num_attention_heads": 12, | |
"projection_dim": 768 | |
}, | |
"torch_dtype": "float32", | |
"transformers_version": "4.44.2", | |
"vision_config": { | |
"dropout": 0.0, | |
"hidden_size": 1024, | |
"image_size": 336, | |
"intermediate_size": 4096, | |
"model_type": "clip_vision_model", | |
"num_attention_heads": 16, | |
"num_hidden_layers": 24, | |
"patch_size": 14, | |
"projection_dim": 768 | |
} | |
} | |