32K partial checkpoint

Files changed (6) hide show

README.md CHANGED Viewed

@@ -1,3 +1,16 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
+language:
+- en
+pipeline_tag: text-classification
 ---
+# Monarch Mixer-BERT
+The 80M checkpoint for M2-BERT-base from the paper [Monarch Mixer: A Simple Sub-Quadratic GEMM-Based Architecture](https://arxiv.org/abs/2310.12109).
+This model has been pretrained with sequence length 32K.
+Note (11/3 evening): this is a partial checkpoint, this one had not finished training before upload.
+This model was trained by Dan Fu, Jon Saad-Falcon, and Simran Arora.
+Check out our [GitHub](https://github.com/HazyResearch/m2/tree/main) for instructions on how to download and fine-tune it!

config.json ADDED Viewed

+{
+    "model_type": "m2_bert"
+}

config.yaml ADDED Viewed

+# Note that some of the fields in this template haven't been filled in yet.
+# Please resolve any `null` fields before launching!
+precision: amp_bf16
+max_seq_len: 32768
+# Tokenizer for dataset creation
+tokenizer_name: bert-base-uncased
+# Base model config
+model:
+  name: bert
+  pretrained_model_name: ${tokenizer_name}
+  tokenizer_name: ${tokenizer_name}
+  model_config:
+    num_attention_heads: 12
+    num_hidden_layers: 12
+    attention_probs_dropout_prob: 0.0
+    max_position_embeddings: 32768
+    monarch_mixer_sequence_mixing: True
+    long_conv_l_max: 32768
+    long_conv_kernel_learning_rate: 1e-3
+    hyena_lr_pos_emb: 1e-5
+    hyena_w: 10
+    hyena_wd: 0.1
+    hyena_emb_dim: 5
+    hyena_filter_order: 128
+    hyena_training_additions: False
+    bidirectional: true
+    residual_long_conv: true
+    use_glu_mlp: True
+    use_monarch_mlp: True
+    monarch_mlp_nblocks: 4
+    use_positional_encodings: True

model.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5f6da4ea57ab1b407363530efba622552c121dc29b439cc9b202f042108d7d2
+size 440736801

model.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:069a59ecf30222fa1e67f68b76f7155966a875ac2ab060f1cb2d1213015e3596
+size 1315397236

version.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 1