model: | |
d_model: 256 | |
n_strokes: 100 | |
n_params: 8 | |
n_layer: 6 | |
rms_norm: false | |
residual_in_fp32: true | |
fused_add_norm: true | |
cross_attn_first: true | |
identical_stroke_token: false | |
ssm_layer: Mamba2 | |
ssm_d_state: 128 | |
encoder_patch_size: 4 | |
encoder_image_size: 128 | |
encoder_depths: | |
- 2 | |
- 2 | |
- 2 | |
encoder_dims: | |
- 64 | |
- 128 | |
- 256 | |