alhosseini
commited on
Commit
•
771ac47
1
Parent(s):
c0eae28
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +34 -0
- checkpoint-250/optimizer_0/.metadata +3 -0
- checkpoint-250/optimizer_0/__0_0.distcp +3 -0
- checkpoint-250/optimizer_0/__1_0.distcp +3 -0
- checkpoint-250/optimizer_0/__2_0.distcp +3 -0
- checkpoint-250/optimizer_0/__3_0.distcp +3 -0
- checkpoint-250/optimizer_0/__4_0.distcp +3 -0
- checkpoint-250/optimizer_0/__5_0.distcp +3 -0
- checkpoint-250/optimizer_0/__6_0.distcp +3 -0
- checkpoint-250/optimizer_0/__7_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/.metadata +0 -0
- checkpoint-250/pytorch_model_fsdp_0/__0_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__1_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__2_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__3_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__4_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__5_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__6_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__7_0.distcp +3 -0
- checkpoint-250/rng_state_0.pth +3 -0
- checkpoint-250/rng_state_1.pth +3 -0
- checkpoint-250/rng_state_2.pth +3 -0
- checkpoint-250/rng_state_3.pth +3 -0
- checkpoint-250/rng_state_4.pth +3 -0
- checkpoint-250/rng_state_5.pth +3 -0
- checkpoint-250/rng_state_6.pth +3 -0
- checkpoint-250/rng_state_7.pth +3 -0
- checkpoint-250/scheduler.pt +3 -0
- checkpoint-250/trainer_state.json +107 -0
- checkpoint-500/optimizer_0/.metadata +3 -0
- checkpoint-500/optimizer_0/__0_0.distcp +3 -0
- checkpoint-500/optimizer_0/__1_0.distcp +3 -0
- checkpoint-500/optimizer_0/__2_0.distcp +3 -0
- checkpoint-500/optimizer_0/__3_0.distcp +3 -0
- checkpoint-500/optimizer_0/__4_0.distcp +3 -0
- checkpoint-500/optimizer_0/__5_0.distcp +3 -0
- checkpoint-500/optimizer_0/__6_0.distcp +3 -0
- checkpoint-500/optimizer_0/__7_0.distcp +3 -0
- checkpoint-500/pytorch_model_fsdp_0/.metadata +0 -0
- checkpoint-500/pytorch_model_fsdp_0/__0_0.distcp +3 -0
- checkpoint-500/pytorch_model_fsdp_0/__1_0.distcp +3 -0
- checkpoint-500/pytorch_model_fsdp_0/__2_0.distcp +3 -0
- checkpoint-500/pytorch_model_fsdp_0/__3_0.distcp +3 -0
- checkpoint-500/pytorch_model_fsdp_0/__4_0.distcp +3 -0
- checkpoint-500/pytorch_model_fsdp_0/__5_0.distcp +3 -0
- checkpoint-500/pytorch_model_fsdp_0/__6_0.distcp +3 -0
- checkpoint-500/pytorch_model_fsdp_0/__7_0.distcp +3 -0
- checkpoint-500/rng_state_0.pth +3 -0
- checkpoint-500/rng_state_1.pth +3 -0
- checkpoint-500/rng_state_2.pth +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,37 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
checkpoint-250/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoint-250/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
38 |
+
checkpoint-250/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
39 |
+
checkpoint-250/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
40 |
+
checkpoint-250/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
41 |
+
checkpoint-250/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
42 |
+
checkpoint-250/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
43 |
+
checkpoint-250/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
44 |
+
checkpoint-250/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
45 |
+
checkpoint-250/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
46 |
+
checkpoint-250/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
47 |
+
checkpoint-250/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
48 |
+
checkpoint-250/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
49 |
+
checkpoint-250/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
50 |
+
checkpoint-250/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
51 |
+
checkpoint-250/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
52 |
+
checkpoint-250/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
53 |
+
checkpoint-500/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
54 |
+
checkpoint-500/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
55 |
+
checkpoint-500/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
56 |
+
checkpoint-500/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
57 |
+
checkpoint-500/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
58 |
+
checkpoint-500/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
59 |
+
checkpoint-500/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
60 |
+
checkpoint-500/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
61 |
+
checkpoint-500/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
62 |
+
checkpoint-500/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
63 |
+
checkpoint-500/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
64 |
+
checkpoint-500/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
65 |
+
checkpoint-500/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
66 |
+
checkpoint-500/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
67 |
+
checkpoint-500/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
68 |
+
checkpoint-500/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
69 |
+
checkpoint-500/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
checkpoint-250/optimizer_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42a07921faeb91b5e7dc24c8f800f35e960259f183f1fb74189978fce8238fa6
|
3 |
+
size 1090439
|
checkpoint-250/optimizer_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8af49041e050a7d5e580bb54e28bc20936646cd348483dcc88d7e1fd47253e86
|
3 |
+
size 8031213736
|
checkpoint-250/optimizer_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed53196190ffc98dab2522ea61ba1073a6e1e4ad32005418bc13ab80fb270b00
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c1b21dac2bbfff910b1b2b6cfe23528ac4c481fcd51cc8458dc4b0bea9230cb
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdc01777e24ace60420f28b2ec64aac9eeda5cdc0bb77ed50c055a4b7f1cfc18
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bafbebb1e5c2763c541156316a3f74ec868419339c73ead45c3a6dee82110009
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c14a82fbfc77d3089e046999391d5a5b17cdc4a493885d17e3aa46ce55f8d233
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca813a08bf1db7911691a81d975487b8b7a7c321d4e99700527da3db4d4590a2
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e18664004e0d6f86b675a2eb84255b5eb4872902db3006c46646ecfca4ce1804
|
3 |
+
size 8030948008
|
checkpoint-250/pytorch_model_fsdp_0/.metadata
ADDED
Binary file (456 kB). View file
|
|
checkpoint-250/pytorch_model_fsdp_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4832cc88f54a83972c901ab6d7ed1c0b3e8cd2f49ed1db6b778ebbd1dfc3c4b
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb4ae50c6a9200e16d668fb542e962e7a01ab8cb3f60a2417917d515c6b30e5e
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d9fb7b8f6ef841834d8fa6fc103a3ec13980a2dd03021bba6ac8e64bf0bbeea
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:151177670bffb930999cb70b4317a13243b301e54f2ddbc8580cafb0d8dd1a2c
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d110936ea7c10d4c5a63f41dc57b4bee9ce3a1958fea27efae371275c06f421
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce8954e461ae1778a6afc474eeb4be1321fbd9708ccc804d4e4d4b2f0e5850a9
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98bfba48163fccfdbb8dec2badf8461b2509a59bf8e8b5b66c2060e4fc10e3e1
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d164fe2cea3b876727aeb894aeb6f594f37d04eb8ce25291ac2e252470278f6b
|
3 |
+
size 4015474004
|
checkpoint-250/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3279c2df5914cd6752d903c7208efa1ae5d8b0bf5c5ab49278e922289379f880
|
3 |
+
size 14960
|
checkpoint-250/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5010fde9a9bad29d5917932b0f579d06e5efa2b29ea0b1394f613ba87f44c727
|
3 |
+
size 14960
|
checkpoint-250/rng_state_2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a1a5e03bef063c92bf60bc912c422f68258eba353563555b3a20e1aca40ee1ff
|
3 |
+
size 14960
|
checkpoint-250/rng_state_3.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39b07d43defbf907ee561f3f9794a15ce15712c4c46ee41c01aead35f8c3b903
|
3 |
+
size 14960
|
checkpoint-250/rng_state_4.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffcf494fc279633fcd1ad28b5b4e9d4c73565230e4f2732766070de033221e0e
|
3 |
+
size 14960
|
checkpoint-250/rng_state_5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ce1d8a27021d02e2efde0060bc3247b3504776ec87e3b7c34b4104a3d06cb27
|
3 |
+
size 14960
|
checkpoint-250/rng_state_6.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e4cc4f2cbb66929072a6a4148043e34543f9d86a55975a9ca87296a68d418a4f
|
3 |
+
size 14960
|
checkpoint-250/rng_state_7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f5ee18c8484553136758b55039b9b0b4ab3972ca683f1b940a640b65faf5503
|
3 |
+
size 14960
|
checkpoint-250/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de7476feb37b952c977ba4ea3b1e02a301a02707d61ac2402e2a1f1e4a882229
|
3 |
+
size 1064
|
checkpoint-250/trainer_state.json
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.8771929824561403,
|
5 |
+
"eval_steps": 100,
|
6 |
+
"global_step": 250,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.08771929824561403,
|
13 |
+
"grad_norm": 23.971385955810547,
|
14 |
+
"learning_rate": 4.9947570655942796e-05,
|
15 |
+
"loss": 2.8211,
|
16 |
+
"step": 25
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.17543859649122806,
|
20 |
+
"grad_norm": 5.184176921844482,
|
21 |
+
"learning_rate": 4.936026311617316e-05,
|
22 |
+
"loss": 2.673,
|
23 |
+
"step": 50
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.2631578947368421,
|
27 |
+
"grad_norm": 13.108516693115234,
|
28 |
+
"learning_rate": 4.813553074106761e-05,
|
29 |
+
"loss": 2.5574,
|
30 |
+
"step": 75
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.3508771929824561,
|
34 |
+
"grad_norm": 4.182127952575684,
|
35 |
+
"learning_rate": 4.630542059139924e-05,
|
36 |
+
"loss": 2.495,
|
37 |
+
"step": 100
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.3508771929824561,
|
41 |
+
"eval_loss": 2.434575319290161,
|
42 |
+
"eval_runtime": 43.2291,
|
43 |
+
"eval_samples_per_second": 13.024,
|
44 |
+
"eval_steps_per_second": 1.642,
|
45 |
+
"step": 100
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 0.43859649122807015,
|
49 |
+
"grad_norm": 3.291740894317627,
|
50 |
+
"learning_rate": 4.391782039544238e-05,
|
51 |
+
"loss": 2.5136,
|
52 |
+
"step": 125
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 0.5263157894736842,
|
56 |
+
"grad_norm": 6.727398872375488,
|
57 |
+
"learning_rate": 4.10352054907785e-05,
|
58 |
+
"loss": 2.6616,
|
59 |
+
"step": 150
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 0.6140350877192983,
|
63 |
+
"grad_norm": 21.510730743408203,
|
64 |
+
"learning_rate": 3.773300405821908e-05,
|
65 |
+
"loss": 2.5945,
|
66 |
+
"step": 175
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 0.7017543859649122,
|
70 |
+
"grad_norm": 4.637833595275879,
|
71 |
+
"learning_rate": 3.409762342408719e-05,
|
72 |
+
"loss": 2.6287,
|
73 |
+
"step": 200
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 0.7017543859649122,
|
77 |
+
"eval_loss": 2.5877649784088135,
|
78 |
+
"eval_runtime": 43.2273,
|
79 |
+
"eval_samples_per_second": 13.024,
|
80 |
+
"eval_steps_per_second": 1.642,
|
81 |
+
"step": 200
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 0.7894736842105263,
|
85 |
+
"grad_norm": 3.1285316944122314,
|
86 |
+
"learning_rate": 3.0224189075781884e-05,
|
87 |
+
"loss": 2.5298,
|
88 |
+
"step": 225
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"epoch": 0.8771929824561403,
|
92 |
+
"grad_norm": 4.8442487716674805,
|
93 |
+
"learning_rate": 2.621405555286121e-05,
|
94 |
+
"loss": 2.4645,
|
95 |
+
"step": 250
|
96 |
+
}
|
97 |
+
],
|
98 |
+
"logging_steps": 25,
|
99 |
+
"max_steps": 500,
|
100 |
+
"num_input_tokens_seen": 0,
|
101 |
+
"num_train_epochs": 2,
|
102 |
+
"save_steps": 250,
|
103 |
+
"total_flos": 2.30551294312448e+16,
|
104 |
+
"train_batch_size": 1,
|
105 |
+
"trial_name": null,
|
106 |
+
"trial_params": null
|
107 |
+
}
|
checkpoint-500/optimizer_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42a07921faeb91b5e7dc24c8f800f35e960259f183f1fb74189978fce8238fa6
|
3 |
+
size 1090439
|
checkpoint-500/optimizer_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fd02e224a18b2544b8daa90cbcc475c2a123b608e998de36a4220415c5b2d57
|
3 |
+
size 8031213736
|
checkpoint-500/optimizer_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8390f6373f9b19726a074415c50bcaea0748148aaac6d45c28881ca0a38fd817
|
3 |
+
size 8030948008
|
checkpoint-500/optimizer_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b79a5e9cba908bd1fce197ba6396edd8924f58066cdb3c47eec5a92f42eb43d
|
3 |
+
size 8030948008
|
checkpoint-500/optimizer_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b93999ae3ab37cdba6b38775559d520b2f023f79f463b1576d490c8d6457259e
|
3 |
+
size 8030948008
|
checkpoint-500/optimizer_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5249618ae8d0d11db33c506bfdfcf4a72dafaa7ed65d8ffbe091e90f0a18c51a
|
3 |
+
size 8030948008
|
checkpoint-500/optimizer_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae4ed47aba75afff4edb05b2f5d5b38cbeb4e3cab2cf42e7e7f8cfa1398ea61e
|
3 |
+
size 8030948008
|
checkpoint-500/optimizer_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7a89baf788c6c6318a13fc99c5c45a659c25a1e1cf39037c5233fbdb40089c1
|
3 |
+
size 8030948008
|
checkpoint-500/optimizer_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33a8cc76953c74b4b7425f36dd94d3f2d85a62d0cae31772fdfeadfb2df97229
|
3 |
+
size 8030948008
|
checkpoint-500/pytorch_model_fsdp_0/.metadata
ADDED
Binary file (456 kB). View file
|
|
checkpoint-500/pytorch_model_fsdp_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a304c1ebd563f3bac43f794681cc1d1055bf4b87775ce5444fb820081dab23b
|
3 |
+
size 4015474004
|
checkpoint-500/pytorch_model_fsdp_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:480d14a187835e9c43ceb58a6392d235a78ec7debc55036307dae497e0b02ab1
|
3 |
+
size 4015474004
|
checkpoint-500/pytorch_model_fsdp_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2e9e631ad084061fc5d045c6b361cb086b627b84fbb974d2b786795c805691fe
|
3 |
+
size 4015474004
|
checkpoint-500/pytorch_model_fsdp_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7bb91d1e2f10e4f059b34d676b795bce52014187f331b0913ebea84b94dd857
|
3 |
+
size 4015474004
|
checkpoint-500/pytorch_model_fsdp_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:662230c151d3c847f63c8f7160da77440237401c6629d3a2ca9f6ff81d5f6d06
|
3 |
+
size 4015474004
|
checkpoint-500/pytorch_model_fsdp_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:044619c98d679ff110ede5b2ca6154a37a3c325069d3c344ca6294bd67a9eb49
|
3 |
+
size 4015474004
|
checkpoint-500/pytorch_model_fsdp_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0997c86af6b7c7a6baffde3187385fe480f16cb773d05f2b994be960bd0192e5
|
3 |
+
size 4015474004
|
checkpoint-500/pytorch_model_fsdp_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a67f155c65ac2993b1f68d1a2bf3bd95e1afbdbb157de58066cf9a6f6cd3bdc1
|
3 |
+
size 4015474004
|
checkpoint-500/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:649998dc08e74eb3b391a07124c4a4856ae90e05693eb61a2a3dbfbfd936ae63
|
3 |
+
size 14960
|
checkpoint-500/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:837e3b4e96ed4375c72e5d72f482e42adc9ae2f0b85f4c141f7ce8805be53b92
|
3 |
+
size 14960
|
checkpoint-500/rng_state_2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:879d570ef453eb6ea7c2bdd207cae158d4f1b641ebb20b599decc8d90cec7969
|
3 |
+
size 14960
|