name: distilbert | |
config_type: model | |
task: backbone | |
activation: gelu | |
attention_dropout: 0.1 | |
dim: 768 | |
dropout: 0.1 | |
hidden_dim: 3072 | |
initializer_range: 0.02 | |
max_position_embeddings: 512 | |
model_type: distilbert | |
n_heads: 12 | |
n_layers: 6 | |
output_past: true | |
pad_token_id: 0 | |
qa_dropout: 0.1 | |
tie_weights_: true | |
vocab_size: 42000 | |