{ | |
"_name_or_path": "MIT/ast-finetuned-audioset-10-10-0.4593", | |
"architectures": [ | |
"ASTForAudioClassification" | |
], | |
"attention_probs_dropout_prob": 0.2, | |
"frequency_stride": 10, | |
"hidden_act": "gelu", | |
"hidden_dropout_prob": 0.2, | |
"hidden_size": 768, | |
"id2label": { | |
"0": "Afrikaans", | |
"1": "Amharic", | |
"2": "Arabic", | |
"3": "Assamese", | |
"4": "Asturian", | |
"5": "Azerbaijani", | |
"6": "Belarusian", | |
"7": "Bulgarian", | |
"8": "Bengali", | |
"9": "Bosnian", | |
"10": "Catalan", | |
"11": "Cebuano", | |
"12": "Sorani-Kurdish", | |
"13": "Mandarin Chinese", | |
"14": "Czech", | |
"15": "Welsh", | |
"16": "Danish", | |
"17": "German", | |
"18": "Greek", | |
"19": "English", | |
"20": "Spanish", | |
"21": "Estonian", | |
"22": "Persian", | |
"23": "Fula", | |
"24": "Finnish", | |
"25": "Filipino", | |
"26": "French", | |
"27": "Irish", | |
"28": "Galician", | |
"29": "Gujarati", | |
"30": "Hausa", | |
"31": "Hebrew", | |
"32": "Hindi", | |
"33": "Croatian", | |
"34": "Hungarian", | |
"35": "Armenian", | |
"36": "Indonesian", | |
"37": "Igbo", | |
"38": "Icelandic", | |
"39": "Italian", | |
"40": "Japanese", | |
"41": "Javanese", | |
"42": "Georgian", | |
"43": "Kamba", | |
"44": "Kabuverdianu", | |
"45": "Kazakh", | |
"46": "Khmer", | |
"47": "Kannada", | |
"48": "Korean", | |
"49": "Kyrgyz", | |
"50": "Luxembourgish", | |
"51": "Ganda", | |
"52": "Lingala", | |
"53": "Lao", | |
"54": "Lithuanian", | |
"55": "Luo", | |
"56": "Latvian", | |
"57": "Maori", | |
"58": "Macedonian", | |
"59": "Malayalam", | |
"60": "Mongolian", | |
"61": "Marathi", | |
"62": "Malay", | |
"63": "Maltese", | |
"64": "Burmese", | |
"65": "Norwegian", | |
"66": "Nepali", | |
"67": "Dutch", | |
"68": "Northern-Sotho", | |
"69": "Nyanja", | |
"70": "Occitan", | |
"71": "Oromo", | |
"72": "Oriya", | |
"73": "Punjabi", | |
"74": "Polish", | |
"75": "Pashto", | |
"76": "Portuguese", | |
"77": "Romanian", | |
"78": "Russian", | |
"79": "Sindhi", | |
"80": "Slovak", | |
"81": "Slovenian", | |
"82": "Shona", | |
"83": "Somali", | |
"84": "Serbian", | |
"85": "Swedish", | |
"86": "Swahili", | |
"87": "Tamil", | |
"88": "Telugu", | |
"89": "Tajik", | |
"90": "Thai", | |
"91": "Turkish", | |
"92": "Ukrainian", | |
"93": "Umbundu", | |
"94": "Urdu", | |
"95": "Uzbek", | |
"96": "Vietnamese", | |
"97": "Wolof", | |
"98": "Xhosa", | |
"99": "Yoruba", | |
"100": "Cantonese Chinese", | |
"101": "Zulu" | |
}, | |
"initializer_range": 0.02, | |
"intermediate_size": 3072, | |
"label2id": { | |
"Afrikaans": 0, | |
"Amharic": 1, | |
"Arabic": 2, | |
"Armenian": 35, | |
"Assamese": 3, | |
"Asturian": 4, | |
"Azerbaijani": 5, | |
"Belarusian": 6, | |
"Bengali": 8, | |
"Bosnian": 9, | |
"Bulgarian": 7, | |
"Burmese": 64, | |
"Cantonese Chinese": 100, | |
"Catalan": 10, | |
"Cebuano": 11, | |
"Croatian": 33, | |
"Czech": 14, | |
"Danish": 16, | |
"Dutch": 67, | |
"English": 19, | |
"Estonian": 21, | |
"Filipino": 25, | |
"Finnish": 24, | |
"French": 26, | |
"Fula": 23, | |
"Galician": 28, | |
"Ganda": 51, | |
"Georgian": 42, | |
"German": 17, | |
"Greek": 18, | |
"Gujarati": 29, | |
"Hausa": 30, | |
"Hebrew": 31, | |
"Hindi": 32, | |
"Hungarian": 34, | |
"Icelandic": 38, | |
"Igbo": 37, | |
"Indonesian": 36, | |
"Irish": 27, | |
"Italian": 39, | |
"Japanese": 40, | |
"Javanese": 41, | |
"Kabuverdianu": 44, | |
"Kamba": 43, | |
"Kannada": 47, | |
"Kazakh": 45, | |
"Khmer": 46, | |
"Korean": 48, | |
"Kyrgyz": 49, | |
"Lao": 53, | |
"Latvian": 56, | |
"Lingala": 52, | |
"Lithuanian": 54, | |
"Luo": 55, | |
"Luxembourgish": 50, | |
"Macedonian": 58, | |
"Malay": 62, | |
"Malayalam": 59, | |
"Maltese": 63, | |
"Mandarin Chinese": 13, | |
"Maori": 57, | |
"Marathi": 61, | |
"Mongolian": 60, | |
"Nepali": 66, | |
"Northern-Sotho": 68, | |
"Norwegian": 65, | |
"Nyanja": 69, | |
"Occitan": 70, | |
"Oriya": 72, | |
"Oromo": 71, | |
"Pashto": 75, | |
"Persian": 22, | |
"Polish": 74, | |
"Portuguese": 76, | |
"Punjabi": 73, | |
"Romanian": 77, | |
"Russian": 78, | |
"Serbian": 84, | |
"Shona": 82, | |
"Sindhi": 79, | |
"Slovak": 80, | |
"Slovenian": 81, | |
"Somali": 83, | |
"Sorani-Kurdish": 12, | |
"Spanish": 20, | |
"Swahili": 86, | |
"Swedish": 85, | |
"Tajik": 89, | |
"Tamil": 87, | |
"Telugu": 88, | |
"Thai": 90, | |
"Turkish": 91, | |
"Ukrainian": 92, | |
"Umbundu": 93, | |
"Urdu": 94, | |
"Uzbek": 95, | |
"Vietnamese": 96, | |
"Welsh": 15, | |
"Wolof": 97, | |
"Xhosa": 98, | |
"Yoruba": 99, | |
"Zulu": 101 | |
}, | |
"layer_norm_eps": 1e-12, | |
"max_length": 1024, | |
"model_type": "audio-spectrogram-transformer", | |
"num_attention_heads": 12, | |
"num_hidden_layers": 6, | |
"num_mel_bins": 128, | |
"patch_size": 16, | |
"problem_type": "single_label_classification", | |
"qkv_bias": true, | |
"time_stride": 10, | |
"torch_dtype": "float32", | |
"transformers_version": "4.27.0.dev0" | |
} | |