{ "_name_or_path": "facebook/wav2vec2-base", "activation_dropout": 0.0, "adapter_attn_dim": null, "adapter_kernel_size": 3, "adapter_stride": 2, "add_adapter": false, "apply_spec_augment": true, "architectures": [ "Wav2Vec2ForSequenceClassification" ], "attention_dropout": 0.1, "bos_token_id": 1, "classifier_proj_size": 256, "codevector_dim": 256, "contrastive_logits_temperature": 0.1, "conv_bias": false, "conv_dim": [ 512, 512, 512, 512, 512, 512, 512 ], "conv_kernel": [ 10, 3, 3, 3, 3, 2, 2 ], "conv_stride": [ 5, 2, 2, 2, 2, 2, 2 ], "ctc_loss_reduction": "sum", "ctc_zero_infinity": false, "diversity_loss_weight": 0.1, "do_stable_layer_norm": false, "eos_token_id": 2, "feat_extract_activation": "gelu", "feat_extract_norm": "group", "feat_proj_dropout": 0.1, "feat_quantizer_dropout": 0.0, "final_dropout": 0.0, "finetuning_task": "audio-classification", "freeze_feat_extract_train": true, "hidden_act": "gelu", "hidden_dropout": 0.1, "hidden_size": 768, "id2label": { "0": 1, "1": 1, "10": 1, "100": 0, "101": 0, "102": 1, "103": 0, "104": 0, "105": 1, "106": 1, "107": 0, "108": 0, "109": 0, "11": 0, "110": 0, "111": 0, "112": 1, "113": 1, "114": 0, "115": 0, "116": 1, "117": 1, "118": 0, "119": 0, "12": 1, "120": 1, "121": 0, "122": 0, "123": 0, "124": 0, "125": 0, "126": 1, "127": 1, "128": 1, "129": 0, "13": 0, "130": 1, "131": 0, "132": 0, "133": 0, "134": 1, "135": 1, "136": 1, "137": 1, "138": 1, "139": 0, "14": 0, "140": 1, "141": 0, "142": 1, "143": 0, "144": 1, "145": 1, "146": 0, "147": 0, "148": 0, "149": 1, "15": 1, "150": 0, "151": 1, "152": 1, "153": 1, "154": 1, "155": 0, "156": 0, "157": 1, "158": 0, "159": 1, "16": 1, "160": 1, "161": 1, "162": 1, "163": 0, "164": 0, "165": 0, "166": 0, "167": 1, "168": 0, "169": 0, "17": 0, "170": 0, "171": 0, "172": 0, "173": 1, "174": 1, "175": 0, "176": 0, "177": 0, "178": 1, "179": 0, "18": 0, "180": 0, "181": 1, "182": 0, "183": 1, "184": 0, "185": 1, "186": 1, "187": 0, "188": 0, "189": 0, "19": 1, "190": 1, "191": 0, "192": 0, "193": 1, "194": 0, "195": 0, "196": 0, "197": 0, "198": 1, "199": 1, "2": 0, "20": 1, "200": 0, "201": 1, "202": 0, "203": 0, "204": 1, "205": 0, "206": 0, "207": 0, "208": 1, "209": 0, "21": 1, "210": 1, "211": 1, "212": 0, "213": 0, "214": 1, "215": 0, "216": 1, "217": 0, "218": 0, "219": 1, "22": 1, "220": 1, "221": 1, "222": 1, "223": 1, "224": 0, "225": 0, "226": 0, "227": 1, "228": 1, "229": 0, "23": 1, "230": 1, "231": 1, "232": 1, "233": 1, "234": 1, "235": 0, "236": 1, "237": 1, "238": 0, "239": 0, "24": 1, "240": 0, "241": 0, "242": 0, "243": 1, "244": 1, "245": 1, "246": 0, "247": 1, "248": 1, "249": 0, "25": 0, "250": 0, "251": 1, "252": 0, "253": 1, "254": 1, "255": 1, "256": 1, "257": 1, "258": 0, "259": 1, "26": 1, "260": 1, "261": 0, "262": 0, "263": 1, "264": 1, "265": 1, "266": 1, "267": 1, "268": 1, "269": 1, "27": 0, "270": 0, "271": 0, "272": 1, "273": 1, "274": 1, "275": 0, "276": 1, "277": 1, "278": 1, "279": 0, "28": 1, "280": 0, "281": 1, "282": 1, "283": 1, "284": 0, "285": 1, "286": 1, "287": 0, "288": 0, "289": 0, "29": 1, "290": 0, "291": 0, "292": 1, "293": 1, "294": 0, "295": 1, "296": 0, "297": 0, "298": 0, "299": 0, "3": 1, "30": 1, "300": 1, "301": 1, "302": 1, "303": 1, "304": 1, "305": 1, "306": 0, "307": 1, "308": 0, "309": 1, "31": 0, "310": 0, "311": 1, "312": 1, "313": 1, "314": 1, "315": 1, "316": 0, "317": 1, "318": 0, "319": 1, "32": 1, "320": 1, "321": 1, "322": 1, "323": 1, "324": 0, "325": 0, "326": 1, "327": 0, "328": 0, "329": 1, "33": 1, "330": 0, "331": 0, "332": 1, "333": 0, "334": 0, "335": 0, "336": 0, "337": 0, "338": 0, "339": 1, "34": 0, "340": 0, "341": 0, "342": 0, "343": 1, "344": 1, "345": 0, "346": 1, "347": 0, "348": 1, "349": 1, "35": 1, "350": 1, "351": 1, "352": 1, "353": 1, "354": 1, "355": 0, "356": 0, "357": 0, "358": 1, "359": 1, "36": 1, "360": 1, "361": 0, "362": 1, "363": 0, "364": 0, "365": 0, "366": 0, "367": 0, "368": 0, "369": 1, "37": 1, "370": 0, "371": 1, "372": 1, "373": 1, "374": 0, "375": 1, "376": 1, "377": 0, "378": 1, "379": 1, "38": 0, "380": 1, "381": 0, "382": 1, "383": 1, "384": 0, "385": 1, "386": 1, "387": 0, "388": 0, "389": 1, "39": 1, "390": 1, "391": 0, "392": 1, "393": 1, "394": 0, "395": 0, "396": 1, "397": 0, "398": 1, "399": 1, "4": 0, "40": 0, "400": 1, "401": 0, "402": 1, "403": 1, "404": 1, "405": 0, "406": 1, "407": 1, "408": 0, "409": 1, "41": 0, "410": 0, "411": 1, "412": 0, "413": 0, "414": 1, "415": 1, "416": 1, "417": 0, "418": 0, "419": 0, "42": 0, "420": 0, "421": 1, "422": 0, "423": 0, "424": 1, "425": 0, "426": 0, "427": 0, "428": 1, "429": 1, "43": 0, "430": 0, "431": 0, "432": 0, "433": 1, "434": 0, "435": 1, "436": 1, "437": 0, "438": 1, "439": 0, "44": 1, "440": 1, "441": 0, "442": 0, "443": 0, "444": 0, "445": 1, "446": 1, "447": 0, "448": 1, "449": 0, "45": 1, "450": 0, "451": 0, "452": 1, "453": 0, "454": 1, "455": 1, "456": 1, "457": 0, "458": 1, "459": 0, "46": 0, "460": 1, "461": 0, "462": 1, "463": 0, "464": 0, "465": 0, "466": 0, "467": 0, "468": 0, "469": 1, "47": 0, "470": 0, "471": 1, "472": 0, "473": 0, "474": 1, "475": 1, "476": 0, "477": 0, "478": 1, "479": 0, "48": 1, "480": 0, "481": 0, "482": 0, "483": 0, "484": 0, "485": 1, "486": 0, "487": 0, "488": 1, "489": 0, "49": 0, "490": 1, "491": 0, "492": 1, "493": 1, "494": 0, "495": 1, "496": 0, "497": 1, "498": 0, "499": 0, "5": 0, "50": 0, "500": 0, "501": 0, "502": 1, "51": 1, "52": 1, "53": 1, "54": 1, "55": 0, "56": 0, "57": 1, "58": 1, "59": 1, "6": 1, "60": 1, "61": 0, "62": 1, "63": 1, "64": 0, "65": 0, "66": 1, "67": 0, "68": 1, "69": 1, "7": 1, "70": 0, "71": 1, "72": 0, "73": 0, "74": 0, "75": 1, "76": 1, "77": 0, "78": 0, "79": 1, "8": 0, "80": 0, "81": 0, "82": 1, "83": 1, "84": 0, "85": 0, "86": 1, "87": 0, "88": 1, "89": 1, "9": 1, "90": 0, "91": 1, "92": 1, "93": 0, "94": 1, "95": 0, "96": 0, "97": 0, "98": 0, "99": 1 }, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0": "501", "1": "502" }, "layer_norm_eps": 1e-05, "layerdrop": 0.0, "mask_channel_length": 10, "mask_channel_min_space": 1, "mask_channel_other": 0.0, "mask_channel_prob": 0.0, "mask_channel_selection": "static", "mask_feature_length": 10, "mask_feature_min_masks": 0, "mask_feature_prob": 0.0, "mask_time_length": 10, "mask_time_min_masks": 2, "mask_time_min_space": 1, "mask_time_other": 0.0, "mask_time_prob": 0.05, "mask_time_selection": "static", "model_type": "wav2vec2", "no_mask_channel_overlap": false, "no_mask_time_overlap": false, "num_adapter_layers": 3, "num_attention_heads": 12, "num_codevector_groups": 2, "num_codevectors_per_group": 320, "num_conv_pos_embedding_groups": 16, "num_conv_pos_embeddings": 128, "num_feat_extract_layers": 7, "num_hidden_layers": 12, "num_negatives": 100, "output_hidden_size": 768, "pad_token_id": 0, "proj_codevector_dim": 256, "tdnn_dilation": [ 1, 2, 3, 1, 1 ], "tdnn_dim": [ 512, 512, 512, 512, 1500 ], "tdnn_kernel": [ 5, 3, 3, 1, 1 ], "torch_dtype": "float32", "transformers_version": "4.44.2", "use_weighted_layer_sum": false, "vocab_size": 32, "xvector_output_dim": 512 }