{ "_name_or_path": "facebook/hubert-base-ls960", "activation_dropout": 0.1, "apply_spec_augment": true, "architectures": [ "HubertForSequenceClassification" ], "attention_dropout": 0.1, "bos_token_id": 1, "classifier_proj_size": 256, "conv_bias": false, "conv_dim": [ 512, 512, 512, 512, 512, 512, 512 ], "conv_kernel": [ 10, 3, 3, 3, 3, 2, 2 ], "conv_stride": [ 5, 2, 2, 2, 2, 2, 2 ], "ctc_loss_reduction": "sum", "ctc_zero_infinity": false, "do_stable_layer_norm": false, "eos_token_id": 2, "feat_extract_activation": "gelu", "feat_extract_dropout": 0.0, "feat_extract_norm": "group", "feat_proj_dropout": 0.1, "feat_proj_layer_norm": true, "final_dropout": 0.1, "gradient_checkpointing": false, "hidden_act": "gelu", "hidden_dropout": 0.1, "hidden_dropout_prob": 0.1, "hidden_size": 768, "id2label": { "0": "0000101", "1": "0000110", "10": "0010010", "11": "0010011", "12": "0010101", "13": "0010110", "14": "0010111", "15": "0011001", "16": "0011010", "17": "0011011", "18": "0011101", "19": "0011110", "2": "0000111", "20": "0011111", "21": "0100001", "22": "0100010", "23": "0100011", "24": "0100101", "25": "0100110", "26": "0100111", "27": "0101001", "28": "0101010", "29": "0101011", "3": "0001001", "30": "0101101", "31": "0101110", "32": "0101111", "33": "0110001", "34": "0110010", "35": "0110011", "36": "0110101", "37": "0110110", "38": "0110111", "39": "0111001", "4": "0001010", "40": "0111010", "41": "0111011", "42": "0111101", "43": "0111110", "44": "0111111", "45": "1000001", "46": "1000010", "47": "1000011", "48": "1000101", "49": "1000110", "5": "0001011", "50": "1000111", "51": "1001001", "52": "1001010", "53": "1001011", "54": "1001101", "55": "1001110", "56": "1001111", "57": "1010001", "58": "1010010", "59": "1010011", "6": "0001101", "60": "1010101", "61": "1010110", "62": "1010111", "63": "1011001", "64": "1011010", "65": "1011011", "66": "1011101", "67": "1011110", "68": "1011111", "69": "1100001", "7": "0001110", "70": "1100010", "71": "1100011", "72": "1100101", "73": "1100110", "74": "1100111", "75": "1101001", "76": "1101010", "77": "1101011", "78": "1101101", "79": "1101110", "8": "0001111", "80": "1101111", "81": "1110001", "82": "1110010", "83": "1110011", "9": "0010001" }, "initializer_range": 0.02, "intermediate_size": 3072, "label2id": { "0000101": "0", "0000110": "1", "0000111": "2", "0001001": "3", "0001010": "4", "0001011": "5", "0001101": "6", "0001110": "7", "0001111": "8", "0010001": "9", "0010010": "10", "0010011": "11", "0010101": "12", "0010110": "13", "0010111": "14", "0011001": "15", "0011010": "16", "0011011": "17", "0011101": "18", "0011110": "19", "0011111": "20", "0100001": "21", "0100010": "22", "0100011": "23", "0100101": "24", "0100110": "25", "0100111": "26", "0101001": "27", "0101010": "28", "0101011": "29", "0101101": "30", "0101110": "31", "0101111": "32", "0110001": "33", "0110010": "34", "0110011": "35", "0110101": "36", "0110110": "37", "0110111": "38", "0111001": "39", "0111010": "40", "0111011": "41", "0111101": "42", "0111110": "43", "0111111": "44", "1000001": "45", "1000010": "46", "1000011": "47", "1000101": "48", "1000110": "49", "1000111": "50", "1001001": "51", "1001010": "52", "1001011": "53", "1001101": "54", "1001110": "55", "1001111": "56", "1010001": "57", "1010010": "58", "1010011": "59", "1010101": "60", "1010110": "61", "1010111": "62", "1011001": "63", "1011010": "64", "1011011": "65", "1011101": "66", "1011110": "67", "1011111": "68", "1100001": "69", "1100010": "70", "1100011": "71", "1100101": "72", "1100110": "73", "1100111": "74", "1101001": "75", "1101010": "76", "1101011": "77", "1101101": "78", "1101110": "79", "1101111": "80", "1110001": "81", "1110010": "82", "1110011": "83" }, "layer_norm_eps": 1e-05, "layerdrop": 0.1, "mask_feature_length": 10, "mask_feature_min_masks": 0, "mask_feature_prob": 0.0, "mask_time_length": 10, "mask_time_min_masks": 2, "mask_time_prob": 0.05, "model_type": "hubert", "num_attention_heads": 12, "num_conv_pos_embedding_groups": 16, "num_conv_pos_embeddings": 128, "num_feat_extract_layers": 7, "num_hidden_layers": 12, "pad_token_id": 0, "tokenizer_class": "Wav2Vec2CTCTokenizer", "torch_dtype": "float32", "transformers_version": "4.38.2", "use_weighted_layer_sum": false, "vocab_size": 32 }