iamnguyen commited on
Commit
f4fda39
1 Parent(s): 1308cd3

Training in progress, step 2408, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
  "down_proj",
25
- "v_proj",
26
- "up_proj",
27
- "o_proj",
28
  "q_proj",
29
- "gate_proj"
 
 
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "down_proj",
 
 
 
24
  "q_proj",
25
+ "up_proj",
26
+ "v_proj",
27
+ "k_proj",
28
+ "gate_proj",
29
+ "o_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5e9f0e5cacd39add29676029c1b56d2faf079ccd31bde8511c1febd454f5a9f
3
  size 147770496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d72b4e2e87323c107d49210059f17ed429c454a732752da6c5865e25a205ff0
3
  size 147770496
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:376458bdc1ce44f1b5535f1f0d029f16825f2845553d0946a862dfe22caf77d0
3
  size 75455810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49ffb8ca622c9fd52fdb5cbea86d262d3918af11162d338978c48cb7a4c35ced
3
  size 75455810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ba69556735679742033757906d03bee67367a53da70c19be3cbf4b9c125c815
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0032afed3d1763dc3a76db5bc3dd838c54e15ccb9ba9d135fc44f226bea6c5d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48f1f2f00d6cbbb8c1507d772e16322e2d45dfb1a89defdca62f9b6a650ede70
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd695d7bacd57e69ba2766eb0aca6257cbed1471361dfa5ddb27276deb1bf1c3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9314881888452812,
5
  "eval_steps": 500,
6
- "global_step": 2404,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -16835,6 +16835,34 @@
16835
  "learning_rate": 1.1911842790474637e-07,
16836
  "loss": 1.3321,
16837
  "step": 2404
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16838
  }
16839
  ],
16840
  "logging_steps": 1.0,
@@ -16854,7 +16882,7 @@
16854
  "attributes": {}
16855
  }
16856
  },
16857
- "total_flos": 2.378188494151456e+18,
16858
  "train_batch_size": 1,
16859
  "trial_name": null,
16860
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.933038085998102,
5
  "eval_steps": 500,
6
+ "global_step": 2408,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
16835
  "learning_rate": 1.1911842790474637e-07,
16836
  "loss": 1.3321,
16837
  "step": 2404
16838
+ },
16839
+ {
16840
+ "epoch": 0.9318756631334864,
16841
+ "grad_norm": 0.19932667911052704,
16842
+ "learning_rate": 1.1777397912764388e-07,
16843
+ "loss": 1.3773,
16844
+ "step": 2405
16845
+ },
16846
+ {
16847
+ "epoch": 0.9322631374216916,
16848
+ "grad_norm": 0.17850585281848907,
16849
+ "learning_rate": 1.1643707021313455e-07,
16850
+ "loss": 1.3967,
16851
+ "step": 2406
16852
+ },
16853
+ {
16854
+ "epoch": 0.9326506117098968,
16855
+ "grad_norm": 0.18462300300598145,
16856
+ "learning_rate": 1.151077032258724e-07,
16857
+ "loss": 1.3623,
16858
+ "step": 2407
16859
+ },
16860
+ {
16861
+ "epoch": 0.933038085998102,
16862
+ "grad_norm": 0.19249114394187927,
16863
+ "learning_rate": 1.137858802188646e-07,
16864
+ "loss": 1.3932,
16865
+ "step": 2408
16866
  }
16867
  ],
16868
  "logging_steps": 1.0,
 
16882
  "attributes": {}
16883
  }
16884
  },
16885
+ "total_flos": 2.382088049023491e+18,
16886
  "train_batch_size": 1,
16887
  "trial_name": null,
16888
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16fb774fc7374c972ea93aaa72bc1893a3284121b69ae795a08735c78b3f7dc5
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76c56280507313f331f5ebcc5db348eedc6cc0a045fc5f4c2b79d99b9c533ba6
3
  size 5560