{ "_name_or_path": "facebook/wav2vec2-base", "activation_dropout": 0.0, "adapter_attn_dim": null, "adapter_kernel_size": 3, "adapter_stride": 2, "add_adapter": false, "apply_spec_augment": true, "architectures": [ "Wav2Vec2ForSequenceClassification" ], "attention_dropout": 0.1, "bos_token_id": 1, "classifier_proj_size": 256, "codevector_dim": 256, "contrastive_logits_temperature": 0.1, "conv_bias": false, "conv_dim": [ 512, 512, 512, 512, 512, 512, 512 ], "conv_kernel": [ 10, 3, 3, 3, 3, 2, 2 ], "conv_stride": [ 5, 2, 2, 2, 2, 2, 2 ], "ctc_loss_reduction": "sum", "ctc_zero_infinity": false, "diversity_loss_weight": 0.1, "do_stable_layer_norm": false, "eos_token_id": 2, "feat_extract_activation": "gelu", "feat_extract_norm": "group", "feat_proj_dropout": 0.1, "feat_quantizer_dropout": 0.0, "final_dropout": 0.0, "finetuning_task": "audio-classification", "freeze_feat_extract_train": true, "hidden_act": "gelu", "hidden_dropout": 0.1, "hidden_size": 768, "id2label": { "0": "af_za", "1": "am_et", "10": "ca_es", "100": "yue_hant_hk", "101": "zu_za", "102": "all", "11": "ceb_ph", "12": "ckb_iq", "13": "cmn_hans_cn", "14": "cs_cz", "15": "cy_gb", "16": "da_dk", "17": "de_de", "18": "el_gr", "19": "en_us", "2": "ar_eg", "20": "es_419", "21": "et_ee", "22": "fa_ir", "23": "ff_sn", "24": "fi_fi", "25": "fil_ph", "26": "fr_fr", "27": "ga_ie", "28": "gl_es", "29": "gu_in", "3": "as_in", "30": "ha_ng", "31": "he_il", "32": "hi_in", "33": "hr_hr", "34": "hu_hu", "35": "hy_am", "36": "id_id", "37": "ig_ng", "38": "is_is", "39": "it_it", "4": "ast_es", "40": "ja_jp", "41": "jv_id", "42": "ka_ge", "43": "kam_ke", "44": "kea_cv", "45": "kk_kz", "46": "km_kh", "47": "kn_in", "48": "ko_kr", "49": "ky_kg", "5": "az_az", "50": "lb_lu", "51": "lg_ug", "52": "ln_cd", "53": "lo_la", "54": "lt_lt", "55": "luo_ke", "56": "lv_lv", "57": "mi_nz", "58": "mk_mk", "59": "ml_in", "6": "be_by", "60": "mn_mn", "61": "mr_in", "62": "ms_my", "63": "mt_mt", "64": "my_mm", "65": "nb_no", "66": "ne_np", "67": "nl_nl", "68": "nso_za", "69": "ny_mw", "7": "bg_bg", "70": "oc_fr", "71": "om_et", "72": "or_in", "73": "pa_in", "74": "pl_pl", "75": "ps_af", "76": "pt_br", "77": "ro_ro", "78": "ru_ru", "79": "sd_in", "8": "bn_in", "80": "sk_sk", "81": "sl_si", "82": "sn_zw", "83": "so_so", "84": "sr_rs", "85": "sv_se", "86": "sw_ke", "87": "ta_in", "88": "te_in", "89": "tg_tj", "9": "bs_ba", "90": "th_th", "91": "tr_tr", "92": "uk_ua", "93": "umb_ao", "94": "ur_pk", "95": "uz_uz", "96": "vi_vn", "97": "wo_sn", "98": "xh_za", "99": "yo_ng" }, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "af_za": "0", "all": "102", "am_et": "1", "ar_eg": "2", "as_in": "3", "ast_es": "4", "az_az": "5", "be_by": "6", "bg_bg": "7", "bn_in": "8", "bs_ba": "9", "ca_es": "10", "ceb_ph": "11", "ckb_iq": "12", "cmn_hans_cn": "13", "cs_cz": "14", "cy_gb": "15", "da_dk": "16", "de_de": "17", "el_gr": "18", "en_us": "19", "es_419": "20", "et_ee": "21", "fa_ir": "22", "ff_sn": "23", "fi_fi": "24", "fil_ph": "25", "fr_fr": "26", "ga_ie": "27", "gl_es": "28", "gu_in": "29", "ha_ng": "30", "he_il": "31", "hi_in": "32", "hr_hr": "33", "hu_hu": "34", "hy_am": "35", "id_id": "36", "ig_ng": "37", "is_is": "38", "it_it": "39", "ja_jp": "40", "jv_id": "41", "ka_ge": "42", "kam_ke": "43", "kea_cv": "44", "kk_kz": "45", "km_kh": "46", "kn_in": "47", "ko_kr": "48", "ky_kg": "49", "lb_lu": "50", "lg_ug": "51", "ln_cd": "52", "lo_la": "53", "lt_lt": "54", "luo_ke": "55", "lv_lv": "56", "mi_nz": "57", "mk_mk": "58", "ml_in": "59", "mn_mn": "60", "mr_in": "61", "ms_my": "62", "mt_mt": "63", "my_mm": "64", "nb_no": "65", "ne_np": "66", "nl_nl": "67", "nso_za": "68", "ny_mw": "69", "oc_fr": "70", "om_et": "71", "or_in": "72", "pa_in": "73", "pl_pl": "74", "ps_af": "75", "pt_br": "76", "ro_ro": "77", "ru_ru": "78", "sd_in": "79", "sk_sk": "80", "sl_si": "81", "sn_zw": "82", "so_so": "83", "sr_rs": "84", "sv_se": "85", "sw_ke": "86", "ta_in": "87", "te_in": "88", "tg_tj": "89", "th_th": "90", "tr_tr": "91", "uk_ua": "92", "umb_ao": "93", "ur_pk": "94", "uz_uz": "95", "vi_vn": "96", "wo_sn": "97", "xh_za": "98", "yo_ng": "99", "yue_hant_hk": "100", "zu_za": "101" }, "layer_norm_eps": 1e-05, "layerdrop": 0.0, "mask_channel_length": 10, "mask_channel_min_space": 1, "mask_channel_other": 0.0, "mask_channel_prob": 0.0, "mask_channel_selection": "static", "mask_feature_length": 10, "mask_feature_min_masks": 0, "mask_feature_prob": 0.0, "mask_time_length": 10, "mask_time_min_masks": 2, "mask_time_min_space": 1, "mask_time_other": 0.0, "mask_time_prob": 0.05, "mask_time_selection": "static", "model_type": "wav2vec2", "no_mask_channel_overlap": false, "no_mask_time_overlap": false, "num_adapter_layers": 3, "num_attention_heads": 12, "num_codevector_groups": 2, "num_codevectors_per_group": 320, "num_conv_pos_embedding_groups": 16, "num_conv_pos_embeddings": 128, "num_feat_extract_layers": 7, "num_hidden_layers": 12, "num_negatives": 100, "output_hidden_size": 768, "pad_token_id": 0, "proj_codevector_dim": 256, "tdnn_dilation": [ 1, 2, 3, 1, 1 ], "tdnn_dim": [ 512, 512, 512, 512, 1500 ], "tdnn_kernel": [ 5, 3, 3, 1, 1 ], "torch_dtype": "float32", "transformers_version": "4.44.2", "use_weighted_layer_sum": false, "vocab_size": 32, "xvector_output_dim": 512 }