{ | |
"_name_or_path": "MIT/ast-finetuned-speech-commands-v2", | |
"architectures": [ | |
"ASTForAudioClassification" | |
], | |
"attention_probs_dropout_prob": 0.0, | |
"frequency_stride": 10, | |
"hidden_act": "gelu", | |
"hidden_dropout_prob": 0.0, | |
"hidden_size": 768, | |
"id2label": { | |
"0": "backward", | |
"1": "follow", | |
"2": "five", | |
"3": "bed", | |
"4": "zero", | |
"5": "on", | |
"6": "learn", | |
"7": "two", | |
"8": "house", | |
"9": "tree", | |
"10": "dog", | |
"11": "stop", | |
"12": "seven", | |
"13": "eight", | |
"14": "down", | |
"15": "six", | |
"16": "forward", | |
"17": "cat", | |
"18": "right", | |
"19": "visual", | |
"20": "four", | |
"21": "wow", | |
"22": "no", | |
"23": "nine", | |
"24": "off", | |
"25": "three", | |
"26": "left", | |
"27": "marvin", | |
"28": "yes", | |
"29": "up", | |
"30": "sheila", | |
"31": "happy", | |
"32": "bird", | |
"33": "go", | |
"34": "one" | |
}, | |
"initializer_range": 0.02, | |
"intermediate_size": 3072, | |
"label2id": { | |
"backward": 0, | |
"bed": 3, | |
"bird": 32, | |
"cat": 17, | |
"dog": 10, | |
"down": 14, | |
"eight": 13, | |
"five": 2, | |
"follow": 1, | |
"forward": 16, | |
"four": 20, | |
"go": 33, | |
"happy": 31, | |
"house": 8, | |
"learn": 6, | |
"left": 26, | |
"marvin": 27, | |
"nine": 23, | |
"no": 22, | |
"off": 24, | |
"on": 5, | |
"one": 34, | |
"right": 18, | |
"seven": 12, | |
"sheila": 30, | |
"six": 15, | |
"stop": 11, | |
"three": 25, | |
"tree": 9, | |
"two": 7, | |
"up": 29, | |
"visual": 19, | |
"wow": 21, | |
"yes": 28, | |
"zero": 4 | |
}, | |
"layer_norm_eps": 1e-12, | |
"max_length": 128, | |
"model_type": "audio-spectrogram-transformer", | |
"num_attention_heads": 12, | |
"num_hidden_layers": 12, | |
"num_mel_bins": 128, | |
"patch_size": 16, | |
"problem_type": "single_label_classification", | |
"qkv_bias": true, | |
"time_stride": 10, | |
"torch_dtype": "float32", | |
"transformers_version": "4.37.0" | |
} | |