alhosseini
commited on
Commit
•
a20e64f
1
Parent(s):
1c6ceee
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +68 -0
- checkpoint-1000/optimizer_0/.metadata +3 -0
- checkpoint-1000/optimizer_0/__0_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__1_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__2_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__3_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__4_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__5_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__6_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__7_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/.metadata +0 -0
- checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp +3 -0
- checkpoint-1000/rng_state_0.pth +3 -0
- checkpoint-1000/rng_state_1.pth +3 -0
- checkpoint-1000/rng_state_2.pth +3 -0
- checkpoint-1000/rng_state_3.pth +3 -0
- checkpoint-1000/rng_state_4.pth +3 -0
- checkpoint-1000/rng_state_5.pth +3 -0
- checkpoint-1000/rng_state_6.pth +3 -0
- checkpoint-1000/rng_state_7.pth +3 -0
- checkpoint-1000/scheduler.pt +3 -0
- checkpoint-1000/trainer_state.json +381 -0
- checkpoint-250/optimizer_0/.metadata +3 -0
- checkpoint-250/optimizer_0/__0_0.distcp +3 -0
- checkpoint-250/optimizer_0/__1_0.distcp +3 -0
- checkpoint-250/optimizer_0/__2_0.distcp +3 -0
- checkpoint-250/optimizer_0/__3_0.distcp +3 -0
- checkpoint-250/optimizer_0/__4_0.distcp +3 -0
- checkpoint-250/optimizer_0/__5_0.distcp +3 -0
- checkpoint-250/optimizer_0/__6_0.distcp +3 -0
- checkpoint-250/optimizer_0/__7_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/.metadata +0 -0
- checkpoint-250/pytorch_model_fsdp_0/__0_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__1_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__2_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__3_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__4_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__5_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__6_0.distcp +3 -0
- checkpoint-250/pytorch_model_fsdp_0/__7_0.distcp +3 -0
- checkpoint-250/rng_state_0.pth +3 -0
- checkpoint-250/rng_state_1.pth +3 -0
- checkpoint-250/rng_state_2.pth +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,71 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
checkpoint-1000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoint-1000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
38 |
+
checkpoint-1000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
39 |
+
checkpoint-1000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
40 |
+
checkpoint-1000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
41 |
+
checkpoint-1000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
42 |
+
checkpoint-1000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
43 |
+
checkpoint-1000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
44 |
+
checkpoint-1000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
45 |
+
checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
46 |
+
checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
47 |
+
checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
48 |
+
checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
49 |
+
checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
50 |
+
checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
51 |
+
checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
52 |
+
checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
53 |
+
checkpoint-250/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
54 |
+
checkpoint-250/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
55 |
+
checkpoint-250/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
56 |
+
checkpoint-250/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
57 |
+
checkpoint-250/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
58 |
+
checkpoint-250/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
59 |
+
checkpoint-250/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
60 |
+
checkpoint-250/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
61 |
+
checkpoint-250/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
62 |
+
checkpoint-250/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
63 |
+
checkpoint-250/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
64 |
+
checkpoint-250/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
65 |
+
checkpoint-250/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
66 |
+
checkpoint-250/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
67 |
+
checkpoint-250/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
68 |
+
checkpoint-250/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
69 |
+
checkpoint-250/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
70 |
+
checkpoint-500/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
71 |
+
checkpoint-500/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
72 |
+
checkpoint-500/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
73 |
+
checkpoint-500/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
74 |
+
checkpoint-500/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
75 |
+
checkpoint-500/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
76 |
+
checkpoint-500/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
77 |
+
checkpoint-500/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
78 |
+
checkpoint-500/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
79 |
+
checkpoint-500/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
80 |
+
checkpoint-500/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
81 |
+
checkpoint-500/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
82 |
+
checkpoint-500/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
83 |
+
checkpoint-500/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
84 |
+
checkpoint-500/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
85 |
+
checkpoint-500/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
86 |
+
checkpoint-500/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
87 |
+
checkpoint-750/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
88 |
+
checkpoint-750/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
89 |
+
checkpoint-750/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
90 |
+
checkpoint-750/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
91 |
+
checkpoint-750/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
92 |
+
checkpoint-750/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
93 |
+
checkpoint-750/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
94 |
+
checkpoint-750/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
95 |
+
checkpoint-750/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
96 |
+
checkpoint-750/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
97 |
+
checkpoint-750/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
98 |
+
checkpoint-750/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
99 |
+
checkpoint-750/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
100 |
+
checkpoint-750/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
101 |
+
checkpoint-750/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
102 |
+
checkpoint-750/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
103 |
+
checkpoint-750/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
checkpoint-1000/optimizer_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42a07921faeb91b5e7dc24c8f800f35e960259f183f1fb74189978fce8238fa6
|
3 |
+
size 1090439
|
checkpoint-1000/optimizer_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7f177789141ca996b0e90a341e39e2de25e91e692e7bc78472f16216b9b2ed9
|
3 |
+
size 8031213736
|
checkpoint-1000/optimizer_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d79d59514bcb88df1ac25a0cb683cdbc222bf5f9c43a595a718cce42dcb2def2
|
3 |
+
size 8030948008
|
checkpoint-1000/optimizer_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64a175196b451493422ed2dc99858b2d77e7586b65bcb84ed9dcba617c822054
|
3 |
+
size 8030948008
|
checkpoint-1000/optimizer_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98b972e9bee455401f97c87120f0003d517be8ec5682ea29ca0c0e700ac9cf7d
|
3 |
+
size 8030948008
|
checkpoint-1000/optimizer_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0fbb6893976df4d88d90b48a49aebc8f1d55d542adafa18cdf69c24f7fe41bee
|
3 |
+
size 8030948008
|
checkpoint-1000/optimizer_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4e29028cb17f1abd4b53b4f04bbc74f7248de5df95a2c3d37e70600727058adf
|
3 |
+
size 8030948008
|
checkpoint-1000/optimizer_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b3db7c61c19ab005c404d6ac71bb33c354e9dfd4e2af619350b875eb3067a3b3
|
3 |
+
size 8030948008
|
checkpoint-1000/optimizer_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a19524bc3be3d623d5dd5e17f2f271b019cfafb8ef88c5c86cde78f8f60ec5f
|
3 |
+
size 8030948008
|
checkpoint-1000/pytorch_model_fsdp_0/.metadata
ADDED
Binary file (456 kB). View file
|
|
checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2c168d0c1e98da39178db29c2cd84f774a394872d6272a6a3a322e9608ee575
|
3 |
+
size 4015474004
|
checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c385938b4e958ffb4d1a1fba0704adf4a74b007878fcda6c49c43e6ed50c66b6
|
3 |
+
size 4015474004
|
checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b6cd014abd1ed8aaaa13278023ec159e31f02bdaf760d947e1db78965d27f38
|
3 |
+
size 4015474004
|
checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c242acc5eb7fbd12f2f4d8c9e8b8f36b8b1c4372061d3ee4d84f39af720f585
|
3 |
+
size 4015474004
|
checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33763ca9f6987d5ada8ad623b33e20c37cfec0c70464e7f3fc3f41010fe97172
|
3 |
+
size 4015474004
|
checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f6c8447522ea6e1ed4d15bfce77a24f327b180a622be74323874c3a5978b769
|
3 |
+
size 4015474004
|
checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27a02007add9e064fa3e8f61d6e521552f2a41cdff9082fa8421e585392e505a
|
3 |
+
size 4015474004
|
checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90835f2f41af6f868e3a8bf111ef2fe4c7f76e028db22b7bb68566660d8e3b10
|
3 |
+
size 4015474004
|
checkpoint-1000/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f2e52b4ff6d63af0a2316dfdb146ae3aa9823994d4f90b1532b6f7bbec5dcca
|
3 |
+
size 14960
|
checkpoint-1000/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7701c1c9415fc3393e58d39af6ac55dcb31801620f895253c16ebf5cb20b5bbe
|
3 |
+
size 14960
|
checkpoint-1000/rng_state_2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f81001d5616bc0c5224760c3ce1f52f630fa887a76a3e7770d02a83bcf38dbc0
|
3 |
+
size 14960
|
checkpoint-1000/rng_state_3.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:572b48ef38fa0ff75e6294c23aea18390b9aa73f011b8dfac9956f5f69e2328f
|
3 |
+
size 14960
|
checkpoint-1000/rng_state_4.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:434403b20b0d8a9fa1b8d4d1af4b56ffe7ae4a7532a8099aeec698d4e7125fc4
|
3 |
+
size 14960
|
checkpoint-1000/rng_state_5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:397f0bc350fc31f88f1987a7e43045e8bc76b6ef407f94011cddf6a0edcb12d0
|
3 |
+
size 14960
|
checkpoint-1000/rng_state_6.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba6d6fe98c76e08aab2a10cfecd2d8a74b059f9683bacb8dc6c3c75a3336e882
|
3 |
+
size 14960
|
checkpoint-1000/rng_state_7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1bea82e2b8f0de62b5067b75169b2e3386b3dacbb75f62ee322ec552172e4c93
|
3 |
+
size 14960
|
checkpoint-1000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f627496766215c292bc1d6ceecb7b0e07bcc89f3a3d097e9d4c5b8a4241c674f
|
3 |
+
size 1064
|
checkpoint-1000/trainer_state.json
ADDED
@@ -0,0 +1,381 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 4.201680672268908,
|
5 |
+
"eval_steps": 100,
|
6 |
+
"global_step": 1000,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.10504201680672269,
|
13 |
+
"grad_norm": 8.704133033752441,
|
14 |
+
"learning_rate": 4.166666666666667e-05,
|
15 |
+
"loss": 3.5481,
|
16 |
+
"step": 25
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.21008403361344538,
|
20 |
+
"grad_norm": 10.129899978637695,
|
21 |
+
"learning_rate": 4.9947570655942796e-05,
|
22 |
+
"loss": 3.0062,
|
23 |
+
"step": 50
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.31512605042016806,
|
27 |
+
"grad_norm": 6.111865997314453,
|
28 |
+
"learning_rate": 4.9734953280908904e-05,
|
29 |
+
"loss": 2.9095,
|
30 |
+
"step": 75
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.42016806722689076,
|
34 |
+
"grad_norm": 4.366275787353516,
|
35 |
+
"learning_rate": 4.936026311617316e-05,
|
36 |
+
"loss": 2.9062,
|
37 |
+
"step": 100
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.42016806722689076,
|
41 |
+
"eval_loss": 2.872298240661621,
|
42 |
+
"eval_runtime": 35.9441,
|
43 |
+
"eval_samples_per_second": 13.02,
|
44 |
+
"eval_steps_per_second": 1.641,
|
45 |
+
"step": 100
|
46 |
+
},
|
47 |
+
{
|
48 |
+
"epoch": 0.5252100840336135,
|
49 |
+
"grad_norm": 25.31424903869629,
|
50 |
+
"learning_rate": 4.882595527372152e-05,
|
51 |
+
"loss": 2.874,
|
52 |
+
"step": 125
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 0.6302521008403361,
|
56 |
+
"grad_norm": 53.229515075683594,
|
57 |
+
"learning_rate": 4.813553074106761e-05,
|
58 |
+
"loss": 2.887,
|
59 |
+
"step": 150
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 0.7352941176470589,
|
63 |
+
"grad_norm": 7.568065166473389,
|
64 |
+
"learning_rate": 4.7293513441455364e-05,
|
65 |
+
"loss": 2.8903,
|
66 |
+
"step": 175
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"epoch": 0.8403361344537815,
|
70 |
+
"grad_norm": 6.394404888153076,
|
71 |
+
"learning_rate": 4.630542059139924e-05,
|
72 |
+
"loss": 2.7455,
|
73 |
+
"step": 200
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 0.8403361344537815,
|
77 |
+
"eval_loss": 2.743553638458252,
|
78 |
+
"eval_runtime": 35.9282,
|
79 |
+
"eval_samples_per_second": 13.026,
|
80 |
+
"eval_steps_per_second": 1.642,
|
81 |
+
"step": 200
|
82 |
+
},
|
83 |
+
{
|
84 |
+
"epoch": 0.9453781512605042,
|
85 |
+
"grad_norm": 3.6301393508911133,
|
86 |
+
"learning_rate": 4.517772654979023e-05,
|
87 |
+
"loss": 2.7609,
|
88 |
+
"step": 225
|
89 |
+
},
|
90 |
+
{
|
91 |
+
"epoch": 1.050420168067227,
|
92 |
+
"grad_norm": 21.900226593017578,
|
93 |
+
"learning_rate": 4.391782039544238e-05,
|
94 |
+
"loss": 2.7411,
|
95 |
+
"step": 250
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"epoch": 1.1554621848739495,
|
99 |
+
"grad_norm": 9.00839900970459,
|
100 |
+
"learning_rate": 4.253395751104748e-05,
|
101 |
+
"loss": 2.3212,
|
102 |
+
"step": 275
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"epoch": 1.2605042016806722,
|
106 |
+
"grad_norm": 3.9047327041625977,
|
107 |
+
"learning_rate": 4.10352054907785e-05,
|
108 |
+
"loss": 2.0506,
|
109 |
+
"step": 300
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"epoch": 1.2605042016806722,
|
113 |
+
"eval_loss": 2.8556525707244873,
|
114 |
+
"eval_runtime": 35.8984,
|
115 |
+
"eval_samples_per_second": 13.037,
|
116 |
+
"eval_steps_per_second": 1.644,
|
117 |
+
"step": 300
|
118 |
+
},
|
119 |
+
{
|
120 |
+
"epoch": 1.365546218487395,
|
121 |
+
"grad_norm": 3.455016613006592,
|
122 |
+
"learning_rate": 3.943138472597549e-05,
|
123 |
+
"loss": 1.826,
|
124 |
+
"step": 325
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"epoch": 1.4705882352941178,
|
128 |
+
"grad_norm": 10.288714408874512,
|
129 |
+
"learning_rate": 3.773300405821908e-05,
|
130 |
+
"loss": 1.8779,
|
131 |
+
"step": 350
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 1.5756302521008403,
|
135 |
+
"grad_norm": 2.957385540008545,
|
136 |
+
"learning_rate": 3.595119192141706e-05,
|
137 |
+
"loss": 1.85,
|
138 |
+
"step": 375
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"epoch": 1.680672268907563,
|
142 |
+
"grad_norm": 3.40120267868042,
|
143 |
+
"learning_rate": 3.409762342408719e-05,
|
144 |
+
"loss": 1.835,
|
145 |
+
"step": 400
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"epoch": 1.680672268907563,
|
149 |
+
"eval_loss": 2.7827320098876953,
|
150 |
+
"eval_runtime": 36.0584,
|
151 |
+
"eval_samples_per_second": 12.979,
|
152 |
+
"eval_steps_per_second": 1.636,
|
153 |
+
"step": 400
|
154 |
+
},
|
155 |
+
{
|
156 |
+
"epoch": 1.7857142857142856,
|
157 |
+
"grad_norm": 3.446842670440674,
|
158 |
+
"learning_rate": 3.218444384962071e-05,
|
159 |
+
"loss": 1.8477,
|
160 |
+
"step": 425
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"epoch": 1.8907563025210083,
|
164 |
+
"grad_norm": 3.1697998046875,
|
165 |
+
"learning_rate": 3.0224189075781884e-05,
|
166 |
+
"loss": 1.7934,
|
167 |
+
"step": 450
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"epoch": 1.995798319327731,
|
171 |
+
"grad_norm": 2.8750455379486084,
|
172 |
+
"learning_rate": 2.8229703434885163e-05,
|
173 |
+
"loss": 1.7526,
|
174 |
+
"step": 475
|
175 |
+
},
|
176 |
+
{
|
177 |
+
"epoch": 2.100840336134454,
|
178 |
+
"grad_norm": 2.977962017059326,
|
179 |
+
"learning_rate": 2.621405555286121e-05,
|
180 |
+
"loss": 0.8246,
|
181 |
+
"step": 500
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"epoch": 2.100840336134454,
|
185 |
+
"eval_loss": 3.001242160797119,
|
186 |
+
"eval_runtime": 35.8998,
|
187 |
+
"eval_samples_per_second": 13.036,
|
188 |
+
"eval_steps_per_second": 1.643,
|
189 |
+
"step": 500
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 2.2058823529411766,
|
193 |
+
"grad_norm": 2.8460147380828857,
|
194 |
+
"learning_rate": 2.419045271866611e-05,
|
195 |
+
"loss": 0.7854,
|
196 |
+
"step": 525
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 2.310924369747899,
|
200 |
+
"grad_norm": 3.5654945373535156,
|
201 |
+
"learning_rate": 2.2172154345117894e-05,
|
202 |
+
"loss": 0.8065,
|
203 |
+
"step": 550
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"epoch": 2.4159663865546217,
|
207 |
+
"grad_norm": 2.3066728115081787,
|
208 |
+
"learning_rate": 2.0172385088197803e-05,
|
209 |
+
"loss": 0.7753,
|
210 |
+
"step": 575
|
211 |
+
},
|
212 |
+
{
|
213 |
+
"epoch": 2.5210084033613445,
|
214 |
+
"grad_norm": 2.5332283973693848,
|
215 |
+
"learning_rate": 1.820424819409143e-05,
|
216 |
+
"loss": 0.824,
|
217 |
+
"step": 600
|
218 |
+
},
|
219 |
+
{
|
220 |
+
"epoch": 2.5210084033613445,
|
221 |
+
"eval_loss": 2.8449320793151855,
|
222 |
+
"eval_runtime": 35.8956,
|
223 |
+
"eval_samples_per_second": 13.038,
|
224 |
+
"eval_steps_per_second": 1.644,
|
225 |
+
"step": 600
|
226 |
+
},
|
227 |
+
{
|
228 |
+
"epoch": 2.6260504201680672,
|
229 |
+
"grad_norm": 3.0348715782165527,
|
230 |
+
"learning_rate": 1.6280639641752942e-05,
|
231 |
+
"loss": 0.7481,
|
232 |
+
"step": 625
|
233 |
+
},
|
234 |
+
{
|
235 |
+
"epoch": 2.73109243697479,
|
236 |
+
"grad_norm": 2.7579290866851807,
|
237 |
+
"learning_rate": 1.4414163643562755e-05,
|
238 |
+
"loss": 0.7543,
|
239 |
+
"step": 650
|
240 |
+
},
|
241 |
+
{
|
242 |
+
"epoch": 2.8361344537815127,
|
243 |
+
"grad_norm": 2.145768404006958,
|
244 |
+
"learning_rate": 1.2617050057750322e-05,
|
245 |
+
"loss": 0.753,
|
246 |
+
"step": 675
|
247 |
+
},
|
248 |
+
{
|
249 |
+
"epoch": 2.9411764705882355,
|
250 |
+
"grad_norm": 2.6759989261627197,
|
251 |
+
"learning_rate": 1.0901074253727336e-05,
|
252 |
+
"loss": 0.7418,
|
253 |
+
"step": 700
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"epoch": 2.9411764705882355,
|
257 |
+
"eval_loss": 2.796262741088867,
|
258 |
+
"eval_runtime": 35.8962,
|
259 |
+
"eval_samples_per_second": 13.038,
|
260 |
+
"eval_steps_per_second": 1.644,
|
261 |
+
"step": 700
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 3.046218487394958,
|
265 |
+
"grad_norm": 1.9286493062973022,
|
266 |
+
"learning_rate": 9.277479955403887e-06,
|
267 |
+
"loss": 0.5768,
|
268 |
+
"step": 725
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 3.1512605042016806,
|
272 |
+
"grad_norm": 1.2430107593536377,
|
273 |
+
"learning_rate": 7.756905568047393e-06,
|
274 |
+
"loss": 0.2213,
|
275 |
+
"step": 750
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 3.2563025210084033,
|
279 |
+
"grad_norm": 1.5327359437942505,
|
280 |
+
"learning_rate": 6.349314471418849e-06,
|
281 |
+
"loss": 0.2563,
|
282 |
+
"step": 775
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 3.361344537815126,
|
286 |
+
"grad_norm": 1.448890209197998,
|
287 |
+
"learning_rate": 5.063929735931985e-06,
|
288 |
+
"loss": 0.2227,
|
289 |
+
"step": 800
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 3.361344537815126,
|
293 |
+
"eval_loss": 3.311384439468384,
|
294 |
+
"eval_runtime": 35.9006,
|
295 |
+
"eval_samples_per_second": 13.036,
|
296 |
+
"eval_steps_per_second": 1.643,
|
297 |
+
"step": 800
|
298 |
+
},
|
299 |
+
{
|
300 |
+
"epoch": 3.466386554621849,
|
301 |
+
"grad_norm": 1.3534725904464722,
|
302 |
+
"learning_rate": 3.90917368959989e-06,
|
303 |
+
"loss": 0.2464,
|
304 |
+
"step": 825
|
305 |
+
},
|
306 |
+
{
|
307 |
+
"epoch": 3.571428571428571,
|
308 |
+
"grad_norm": 1.3151746988296509,
|
309 |
+
"learning_rate": 2.892612731749414e-06,
|
310 |
+
"loss": 0.2439,
|
311 |
+
"step": 850
|
312 |
+
},
|
313 |
+
{
|
314 |
+
"epoch": 3.6764705882352944,
|
315 |
+
"grad_norm": 1.4781558513641357,
|
316 |
+
"learning_rate": 2.020907755104698e-06,
|
317 |
+
"loss": 0.2355,
|
318 |
+
"step": 875
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"epoch": 3.7815126050420167,
|
322 |
+
"grad_norm": 1.4184162616729736,
|
323 |
+
"learning_rate": 1.2997705010932393e-06,
|
324 |
+
"loss": 0.2198,
|
325 |
+
"step": 900
|
326 |
+
},
|
327 |
+
{
|
328 |
+
"epoch": 3.7815126050420167,
|
329 |
+
"eval_loss": 3.312718152999878,
|
330 |
+
"eval_runtime": 35.8966,
|
331 |
+
"eval_samples_per_second": 13.037,
|
332 |
+
"eval_steps_per_second": 1.644,
|
333 |
+
"step": 900
|
334 |
+
},
|
335 |
+
{
|
336 |
+
"epoch": 3.8865546218487395,
|
337 |
+
"grad_norm": 1.545154333114624,
|
338 |
+
"learning_rate": 7.339261343510206e-07,
|
339 |
+
"loss": 0.222,
|
340 |
+
"step": 925
|
341 |
+
},
|
342 |
+
{
|
343 |
+
"epoch": 3.991596638655462,
|
344 |
+
"grad_norm": 1.5649031400680542,
|
345 |
+
"learning_rate": 3.270822816527325e-07,
|
346 |
+
"loss": 0.2069,
|
347 |
+
"step": 950
|
348 |
+
},
|
349 |
+
{
|
350 |
+
"epoch": 4.0966386554621845,
|
351 |
+
"grad_norm": 0.9737259149551392,
|
352 |
+
"learning_rate": 8.190473813576572e-08,
|
353 |
+
"loss": 0.1163,
|
354 |
+
"step": 975
|
355 |
+
},
|
356 |
+
{
|
357 |
+
"epoch": 4.201680672268908,
|
358 |
+
"grad_norm": 0.9735974669456482,
|
359 |
+
"learning_rate": 0.0,
|
360 |
+
"loss": 0.1478,
|
361 |
+
"step": 1000
|
362 |
+
},
|
363 |
+
{
|
364 |
+
"epoch": 4.201680672268908,
|
365 |
+
"eval_loss": 3.372861385345459,
|
366 |
+
"eval_runtime": 35.8981,
|
367 |
+
"eval_samples_per_second": 13.037,
|
368 |
+
"eval_steps_per_second": 1.644,
|
369 |
+
"step": 1000
|
370 |
+
}
|
371 |
+
],
|
372 |
+
"logging_steps": 25,
|
373 |
+
"max_steps": 1000,
|
374 |
+
"num_input_tokens_seen": 0,
|
375 |
+
"num_train_epochs": 5,
|
376 |
+
"save_steps": 250,
|
377 |
+
"total_flos": 9.22205177249792e+16,
|
378 |
+
"train_batch_size": 1,
|
379 |
+
"trial_name": null,
|
380 |
+
"trial_params": null
|
381 |
+
}
|
checkpoint-250/optimizer_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42a07921faeb91b5e7dc24c8f800f35e960259f183f1fb74189978fce8238fa6
|
3 |
+
size 1090439
|
checkpoint-250/optimizer_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0cfd271f3dee1a625ddae0dc9979b3488ff3b4c52d49c395600c818a5272e397
|
3 |
+
size 8031213736
|
checkpoint-250/optimizer_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89f5bb8dcc5e461163c2f5c78ed9806bdded86f52057b6c9cf47f2296db1bfaa
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b212f9e1e8402eb564cc0b72eb224d507437e32bfd2207199836b9856f4158f
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:834495a2cdadbe24135ed59c8f32a7314d66758d5e1d8fbfa1da88774b031b72
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e8e99a458df63f3a569ac0737ee401ca59725efeb9fd7cf23560271759553d5b
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3455e842b33fd85391b1cb135d011a7a200b922e33c9b20b5ff24fc13c88a914
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73a0ec7ea24a0d0384ee39f363d9f7c2207190de2bc728875e11a620562ce008
|
3 |
+
size 8030948008
|
checkpoint-250/optimizer_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7544855e1e3089cb67dbfce67f657857a6e58c5563e68f1b9024ed6f03048088
|
3 |
+
size 8030948008
|
checkpoint-250/pytorch_model_fsdp_0/.metadata
ADDED
Binary file (456 kB). View file
|
|
checkpoint-250/pytorch_model_fsdp_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad4e190657e60a65f0c9dc8730e20f624811f7ce93e1d0f471e92893191bd6c5
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca42926a244a70554e7f4fdb59660bb1127500225a4bfc7e922775800a50d47d
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d7ecd6017a2dd3c9a01ba52aa0a1ebd16fa417a706aff8a2e1f38dc9a328eb2
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ad4b6d3724edcf396402b673a1697c8728d83b333079df230dd4a1bfd0a5793
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b936fd437858ee02804a3e57092575c794967cf353b441a24c386c644e11a665
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40cd4e340acbce57aa08fc6f8c5dd33f27be417156a7b0041801436ddfdfc246
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f193a686bed3a2589f2963a17e53477f1d9f8c785e80edaeed3a0d08667af2ab
|
3 |
+
size 4015474004
|
checkpoint-250/pytorch_model_fsdp_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:403856e7121e448ee4fcd1a5fdbeac5ff6f40d6ac564cab9ee7cedaeac75b229
|
3 |
+
size 4015474004
|
checkpoint-250/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5646d8ccb8e918de34f25b0c51a9c4fb696f5429a47506649c9badd8bf3bcfe1
|
3 |
+
size 14960
|
checkpoint-250/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d94c7abd3510f6bbf1a2ac0a285c77356db76debe7ce90119c9cb896dd03b12d
|
3 |
+
size 14960
|
checkpoint-250/rng_state_2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b31a8737e95e91b14308f4d491da2ab52a884518f361e0d3ab328cb7fad81728
|
3 |
+
size 14960
|