Training in progress, step 300000

Files changed (5) hide show

babyslm/syntactic.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

blimp_results.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:786c4ea1c8aee1d056e0da411594cb9fd8f38d1baa2aee83a9f5a8545e8a4d00
-size 80218105

 version https://git-lfs.github.com/spec/v1
+oid sha256:0b41adc6025719ae782ea15d490e37c19d6269406a341471ab2b5369af18cd9c
+size 80274155

config.json CHANGED Viewed

@@ -3,20 +3,20 @@
   "architectures": [
     "GPT2LMHeadModel"
   ],
-  "attn_pdrop": 0.3,
   "bos_token_id": 0,
-  "embd_pdrop": 0.3,
   "eos_token_id": 0,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
-  "n_embd": 512,
-  "n_head": 8,
-  "n_inner": 2048,
-  "n_layer": 6,
   "n_positions": 256,
   "reorder_and_upcast_attn": false,
-  "resid_pdrop": 0.3,
   "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,

   "architectures": [
     "GPT2LMHeadModel"
   ],
+  "attn_pdrop": 0.1,
   "bos_token_id": 0,
+  "embd_pdrop": 0.1,
   "eos_token_id": 0,
   "initializer_range": 0.02,
   "layer_norm_epsilon": 1e-05,
   "model_type": "gpt2",
+  "n_embd": 768,
+  "n_head": 12,
+  "n_inner": 3072,
+  "n_layer": 12,
   "n_positions": 256,
   "reorder_and_upcast_attn": false,
+  "resid_pdrop": 0.1,
   "scale_attn_by_inverse_layer_idx": false,
   "scale_attn_weights": true,
   "summary_activation": null,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:114a339d56e7961c6cae9800c50f675fe0f738602fa7e4cf307c1695c6030aff
-size 108961160

 version https://git-lfs.github.com/spec/v1
+oid sha256:6c232df34b9049274b26b3394f51eddaf3ea6570465a27cb027a057d3d0b5127
+size 390177408

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e0b5a8b6f1890201775a9dff957294eb0b004f3daffec4d13e5aebb84bcac43e
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:5ae0dde72c52f7e1e0ff9bbc8aac95f1da19222bc12c580df46c08d305ad1f34
 size 5368