{ | |
"block_size": "4_4_4", | |
"d_embed": 768, | |
"d_head": 64, | |
"d_inner": 3072, | |
"d_model": 768, | |
"dropact": 0.0, | |
"dropatt": 0.1, | |
"dropout": 0.1, | |
"n_head": 12, | |
"pool_q_only": true, | |
"pooling_size": 2, | |
"pooling_type": "mean", | |
"separate_cls": true, | |
"vocab_size": 50000 | |
} |