{ "architectures": [ "MCTCTModel" ], "attention_head_dim": 128, "attention_probs_dropout_prob": 0.3, "bos_token_id": 0, "conv_channels": null, "conv_dropout": 0.3, "conv_glu_dim": 1, "conv_kernel": [ 7 ], "conv_stride": [ 3 ], "ctc_loss_reduction": "sum", "ctc_zero_infinity": false, "eos_token_id": 2, "hidden_act": "relu", "hidden_dropout_prob": 0.3, "hidden_size": 512, "initializer_range": 0.02, "input_channels": 1, "input_feat_per_channel": 80, "intermediate_size": 20, "layer_norm_eps": 1e-05, "layerdrop": 0.3, "max_position_embeddings": 920, "model_type": "mctct", "num_attention_heads": 4, "num_conv_layers": 1, "num_hidden_layers": 4, "pad_token_id": 1, "torch_dtype": "float32", "transformers_version": "4.28.0.dev0", "vocab_size": 32 }