Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +91 -0
- checkpoint-1000/optimizer_0/.metadata +3 -0
- checkpoint-1000/optimizer_0/__0_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__1_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__2_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__3_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__4_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__5_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__6_0.distcp +3 -0
- checkpoint-1000/optimizer_0/__7_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/.metadata +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp +3 -0
- checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp +3 -0
- checkpoint-1000/rng_state_0.pth +3 -0
- checkpoint-1000/rng_state_1.pth +3 -0
- checkpoint-1000/rng_state_2.pth +3 -0
- checkpoint-1000/rng_state_3.pth +3 -0
- checkpoint-1000/rng_state_4.pth +3 -0
- checkpoint-1000/rng_state_5.pth +3 -0
- checkpoint-1000/rng_state_6.pth +3 -0
- checkpoint-1000/rng_state_7.pth +3 -0
- checkpoint-1000/scheduler.pt +3 -0
- checkpoint-1000/trainer_state.json +173 -0
- checkpoint-2000/optimizer_0/.metadata +3 -0
- checkpoint-2000/optimizer_0/__0_0.distcp +3 -0
- checkpoint-2000/optimizer_0/__1_0.distcp +3 -0
- checkpoint-2000/optimizer_0/__2_0.distcp +3 -0
- checkpoint-2000/optimizer_0/__3_0.distcp +3 -0
- checkpoint-2000/optimizer_0/__4_0.distcp +3 -0
- checkpoint-2000/optimizer_0/__5_0.distcp +3 -0
- checkpoint-2000/optimizer_0/__6_0.distcp +3 -0
- checkpoint-2000/optimizer_0/__7_0.distcp +3 -0
- checkpoint-2000/pytorch_model_fsdp_0/.metadata +3 -0
- checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp +3 -0
- checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp +3 -0
- checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp +3 -0
- checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp +3 -0
- checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp +3 -0
- checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp +3 -0
- checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp +3 -0
- checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp +3 -0
- checkpoint-2000/rng_state_0.pth +3 -0
- checkpoint-2000/rng_state_1.pth +3 -0
- checkpoint-2000/rng_state_2.pth +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,94 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
checkpoint-1000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoint-1000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
38 |
+
checkpoint-1000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
39 |
+
checkpoint-1000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
40 |
+
checkpoint-1000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
41 |
+
checkpoint-1000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
42 |
+
checkpoint-1000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
43 |
+
checkpoint-1000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
44 |
+
checkpoint-1000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
45 |
+
checkpoint-1000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
46 |
+
checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
47 |
+
checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
48 |
+
checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
49 |
+
checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
50 |
+
checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
51 |
+
checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
52 |
+
checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
53 |
+
checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
54 |
+
checkpoint-2000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
55 |
+
checkpoint-2000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
56 |
+
checkpoint-2000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
57 |
+
checkpoint-2000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
58 |
+
checkpoint-2000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
59 |
+
checkpoint-2000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
60 |
+
checkpoint-2000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
61 |
+
checkpoint-2000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
62 |
+
checkpoint-2000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
63 |
+
checkpoint-2000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
64 |
+
checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
65 |
+
checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
66 |
+
checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
67 |
+
checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
68 |
+
checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
69 |
+
checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
70 |
+
checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
71 |
+
checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
72 |
+
checkpoint-3000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
73 |
+
checkpoint-3000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
74 |
+
checkpoint-3000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
75 |
+
checkpoint-3000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
76 |
+
checkpoint-3000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
77 |
+
checkpoint-3000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
78 |
+
checkpoint-3000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
79 |
+
checkpoint-3000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
80 |
+
checkpoint-3000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
81 |
+
checkpoint-3000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
82 |
+
checkpoint-3000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
83 |
+
checkpoint-3000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
84 |
+
checkpoint-3000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
85 |
+
checkpoint-3000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
86 |
+
checkpoint-3000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
87 |
+
checkpoint-3000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
88 |
+
checkpoint-3000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
89 |
+
checkpoint-3000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
90 |
+
checkpoint-4000/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
91 |
+
checkpoint-4000/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
92 |
+
checkpoint-4000/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
93 |
+
checkpoint-4000/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
94 |
+
checkpoint-4000/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
95 |
+
checkpoint-4000/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
96 |
+
checkpoint-4000/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
97 |
+
checkpoint-4000/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
98 |
+
checkpoint-4000/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
99 |
+
checkpoint-4000/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
100 |
+
checkpoint-4000/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
101 |
+
checkpoint-4000/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
102 |
+
checkpoint-4000/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
103 |
+
checkpoint-4000/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
104 |
+
checkpoint-4000/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
105 |
+
checkpoint-4000/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
106 |
+
checkpoint-4000/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
107 |
+
checkpoint-4000/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
108 |
+
checkpoint-4863/optimizer_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
109 |
+
checkpoint-4863/optimizer_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
110 |
+
checkpoint-4863/optimizer_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
111 |
+
checkpoint-4863/optimizer_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
112 |
+
checkpoint-4863/optimizer_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
113 |
+
checkpoint-4863/optimizer_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
114 |
+
checkpoint-4863/optimizer_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
115 |
+
checkpoint-4863/optimizer_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
116 |
+
checkpoint-4863/optimizer_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
117 |
+
checkpoint-4863/pytorch_model_fsdp_0/.metadata filter=lfs diff=lfs merge=lfs -text
|
118 |
+
checkpoint-4863/pytorch_model_fsdp_0/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
119 |
+
checkpoint-4863/pytorch_model_fsdp_0/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
120 |
+
checkpoint-4863/pytorch_model_fsdp_0/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
121 |
+
checkpoint-4863/pytorch_model_fsdp_0/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
122 |
+
checkpoint-4863/pytorch_model_fsdp_0/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
123 |
+
checkpoint-4863/pytorch_model_fsdp_0/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
124 |
+
checkpoint-4863/pytorch_model_fsdp_0/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
125 |
+
checkpoint-4863/pytorch_model_fsdp_0/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
126 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
checkpoint-1000/optimizer_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c823bc431bf7d807ed32a5f978a1748f129e77a58aba3594daf3a2045d091648
|
3 |
+
size 2626018
|
checkpoint-1000/optimizer_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e3c2153bf02b381c6ad2ffe0824dc5ba46c39423c40fb753a13b2b81d22c2d6d
|
3 |
+
size 55406592
|
checkpoint-1000/optimizer_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:976f38d9d7d31749574bb1d3dc238341bd89db0ad56b4a89aca61daaedbd1dc8
|
3 |
+
size 55526656
|
checkpoint-1000/optimizer_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab8f4a691793cccd18d28711b97db5c3fffd047450c73cd1bc8c15d242a455e0
|
3 |
+
size 55480896
|
checkpoint-1000/optimizer_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7794d7737fe9ab804c47265172c228f322c3ce0d8ae1f4b0da1c0961299c454b
|
3 |
+
size 55480896
|
checkpoint-1000/optimizer_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1be0f25d5d1ef6a5da7241da8e0bfd32f5ea5acae0402cb65c0b237cdb85ba52
|
3 |
+
size 55480032
|
checkpoint-1000/optimizer_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d508dfa9519b3b5007e08eb3cfcadb5e1af8f66b5cbb7c46433de1a0220b3fd0
|
3 |
+
size 55480032
|
checkpoint-1000/optimizer_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef388af3d62f8049bdd7aa4bd92686f81eba235bd699a8770d3d998bd7fc7201
|
3 |
+
size 55480032
|
checkpoint-1000/optimizer_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:862e1e89ad8d6d45b9612642902fb8e7e24f3242c701559334e4a98718bd9930
|
3 |
+
size 55480032
|
checkpoint-1000/pytorch_model_fsdp_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19502f0f22e6a789d9430be30fd1319f8dd68afbb00a9bc001926d880d5042d1
|
3 |
+
size 1064888
|
checkpoint-1000/pytorch_model_fsdp_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87a8ae8d0d9b4c6d4906e101e591b6ff24ca8256fb349638bc2c9bafe125a6fb
|
3 |
+
size 27702864
|
checkpoint-1000/pytorch_model_fsdp_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fe1fb95d56970cff4dcbc83a83ca7f8ecac9bc641868cf8fde4b69f5b7b57f5
|
3 |
+
size 27702864
|
checkpoint-1000/pytorch_model_fsdp_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:be0526cb870508e64adac4dba0b1cd510e90492d4a0c715b6f3824cd9a832ce6
|
3 |
+
size 27702864
|
checkpoint-1000/pytorch_model_fsdp_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:548171e890776a3386a8114d5e24eb128c0035a60402484b03960f2e01651715
|
3 |
+
size 27702864
|
checkpoint-1000/pytorch_model_fsdp_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40528c0670a9dbde32993d03f21ac1cb232c25c596ab49daa179f3ebcf19bf8d
|
3 |
+
size 27702864
|
checkpoint-1000/pytorch_model_fsdp_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a30cd894ac0476288fe9a1b47f5257a750b3f8b247ccaa46692096d393f54c7a
|
3 |
+
size 27702864
|
checkpoint-1000/pytorch_model_fsdp_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b0e628ecd537a0d31f91287a8a7320005dcef7e164a9f8b11c199eeda8058c52
|
3 |
+
size 27702864
|
checkpoint-1000/pytorch_model_fsdp_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd911f0a16f12715e3105bf4ac7d5735f4c9e2bb7b1e1fd50687fa706884fbd0
|
3 |
+
size 27702864
|
checkpoint-1000/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:852a2229a3726ffedc43daf9b32d882ac09be192242bada110e4e27a158a4ad8
|
3 |
+
size 15984
|
checkpoint-1000/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:607d5b9fe1b6ffab2c6ab8c0cca7a2dc074f35f0f539611c21febcb657cb9230
|
3 |
+
size 15984
|
checkpoint-1000/rng_state_2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21bfb8fc3702bc02f20eecb02befd28c4d4a0cf2d6e241c7b306fedf67d74101
|
3 |
+
size 15984
|
checkpoint-1000/rng_state_3.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f680126569c803d74359f9aa642e22ede3c8af20715e7c162ef1abe0100375d8
|
3 |
+
size 15984
|
checkpoint-1000/rng_state_4.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0095abae33bd227b2ec0fdc9f9fe68be4e271a993b2a40b77676a7d46e7c0877
|
3 |
+
size 15984
|
checkpoint-1000/rng_state_5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb75e7ee126757b5c516ccdb6844f17b23593a1586dc161a8668186a81a17481
|
3 |
+
size 15984
|
checkpoint-1000/rng_state_6.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2d0e74d163711a1f334aacbfe6cc80b35858c2f0be112236077c3cb5c8047a2
|
3 |
+
size 15984
|
checkpoint-1000/rng_state_7.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba53e2865adec6eb39312aa9f9ea49a46316b7a8c07f97959fc55d8a4a61f463
|
3 |
+
size 15984
|
checkpoint-1000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1252495abb6207a87314f27eb7a52b71b9170963ff2e044a7fac5b9a90ef861
|
3 |
+
size 1064
|
checkpoint-1000/trainer_state.json
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": null,
|
3 |
+
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.6169031462060457,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 1000,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.030845157310302282,
|
13 |
+
"grad_norm": 2.8206074237823486,
|
14 |
+
"learning_rate": 1.0277492291880782e-05,
|
15 |
+
"loss": 1.8082,
|
16 |
+
"step": 50
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.061690314620604564,
|
20 |
+
"grad_norm": 3.4183013439178467,
|
21 |
+
"learning_rate": 2.0554984583761563e-05,
|
22 |
+
"loss": 0.6538,
|
23 |
+
"step": 100
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.09253547193090685,
|
27 |
+
"grad_norm": 2.170591354370117,
|
28 |
+
"learning_rate": 3.083247687564235e-05,
|
29 |
+
"loss": 0.4563,
|
30 |
+
"step": 150
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.12338062924120913,
|
34 |
+
"grad_norm": 1.4687080383300781,
|
35 |
+
"learning_rate": 4.110996916752313e-05,
|
36 |
+
"loss": 0.4263,
|
37 |
+
"step": 200
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.15422578655151142,
|
41 |
+
"grad_norm": 1.836676836013794,
|
42 |
+
"learning_rate": 5.1387461459403907e-05,
|
43 |
+
"loss": 0.3994,
|
44 |
+
"step": 250
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.1850709438618137,
|
48 |
+
"grad_norm": 1.2718663215637207,
|
49 |
+
"learning_rate": 6.16649537512847e-05,
|
50 |
+
"loss": 0.3665,
|
51 |
+
"step": 300
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.215916101172116,
|
55 |
+
"grad_norm": 1.6945191621780396,
|
56 |
+
"learning_rate": 7.194244604316547e-05,
|
57 |
+
"loss": 0.3577,
|
58 |
+
"step": 350
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.24676125848241826,
|
62 |
+
"grad_norm": 1.2829898595809937,
|
63 |
+
"learning_rate": 8.221993833504625e-05,
|
64 |
+
"loss": 0.347,
|
65 |
+
"step": 400
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.27760641579272055,
|
69 |
+
"grad_norm": 1.01521635055542,
|
70 |
+
"learning_rate": 9.249743062692704e-05,
|
71 |
+
"loss": 0.3288,
|
72 |
+
"step": 450
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.30845157310302285,
|
76 |
+
"grad_norm": 1.522111415863037,
|
77 |
+
"learning_rate": 0.00010277492291880781,
|
78 |
+
"loss": 0.3267,
|
79 |
+
"step": 500
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.3392967304133251,
|
83 |
+
"grad_norm": 0.9678927659988403,
|
84 |
+
"learning_rate": 0.00011305241521068859,
|
85 |
+
"loss": 0.3198,
|
86 |
+
"step": 550
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.3701418877236274,
|
90 |
+
"grad_norm": 1.2144405841827393,
|
91 |
+
"learning_rate": 0.0001233299075025694,
|
92 |
+
"loss": 0.3099,
|
93 |
+
"step": 600
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.4009870450339297,
|
97 |
+
"grad_norm": 1.3122639656066895,
|
98 |
+
"learning_rate": 0.00013360739979445017,
|
99 |
+
"loss": 0.2929,
|
100 |
+
"step": 650
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.431832202344232,
|
104 |
+
"grad_norm": 1.0934101343154907,
|
105 |
+
"learning_rate": 0.00014388489208633093,
|
106 |
+
"loss": 0.3003,
|
107 |
+
"step": 700
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.4626773596545342,
|
111 |
+
"grad_norm": 0.7938969731330872,
|
112 |
+
"learning_rate": 0.00015416238437821172,
|
113 |
+
"loss": 0.2956,
|
114 |
+
"step": 750
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.4935225169648365,
|
118 |
+
"grad_norm": 0.6571168303489685,
|
119 |
+
"learning_rate": 0.0001644398766700925,
|
120 |
+
"loss": 0.2736,
|
121 |
+
"step": 800
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.5243676742751388,
|
125 |
+
"grad_norm": 1.0073938369750977,
|
126 |
+
"learning_rate": 0.0001747173689619733,
|
127 |
+
"loss": 0.2892,
|
128 |
+
"step": 850
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.5552128315854411,
|
132 |
+
"grad_norm": 0.9874083399772644,
|
133 |
+
"learning_rate": 0.00018499486125385408,
|
134 |
+
"loss": 0.2723,
|
135 |
+
"step": 900
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.5860579888957433,
|
139 |
+
"grad_norm": 1.1770968437194824,
|
140 |
+
"learning_rate": 0.00019527235354573487,
|
141 |
+
"loss": 0.2855,
|
142 |
+
"step": 950
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.6169031462060457,
|
146 |
+
"grad_norm": 1.00326669216156,
|
147 |
+
"learning_rate": 0.00019997622717095418,
|
148 |
+
"loss": 0.2587,
|
149 |
+
"step": 1000
|
150 |
+
}
|
151 |
+
],
|
152 |
+
"logging_steps": 50,
|
153 |
+
"max_steps": 4863,
|
154 |
+
"num_input_tokens_seen": 0,
|
155 |
+
"num_train_epochs": 3,
|
156 |
+
"save_steps": 1000,
|
157 |
+
"stateful_callbacks": {
|
158 |
+
"TrainerControl": {
|
159 |
+
"args": {
|
160 |
+
"should_epoch_stop": false,
|
161 |
+
"should_evaluate": false,
|
162 |
+
"should_log": false,
|
163 |
+
"should_save": true,
|
164 |
+
"should_training_stop": false
|
165 |
+
},
|
166 |
+
"attributes": {}
|
167 |
+
}
|
168 |
+
},
|
169 |
+
"total_flos": 7124677004623872.0,
|
170 |
+
"train_batch_size": 1,
|
171 |
+
"trial_name": null,
|
172 |
+
"trial_params": null
|
173 |
+
}
|
checkpoint-2000/optimizer_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5626ee28c1f24f09a7b7f3a240119b9d2db5082be6528657524ec153ab4bab40
|
3 |
+
size 2626018
|
checkpoint-2000/optimizer_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e1ddac24aff4bfe008d75f42dabb23ed177efe5cefe9f7b5f29f71a0ebdb1c1
|
3 |
+
size 55406592
|
checkpoint-2000/optimizer_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:644a8b57f5bf12acebd9a45311e38f079556731bd0cce4d9e4479c52fe290daf
|
3 |
+
size 55526656
|
checkpoint-2000/optimizer_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:07e3ed8ae7c7f161c908da0ff18b57aeca951ebbd215de56c152243a70b4d230
|
3 |
+
size 55480896
|
checkpoint-2000/optimizer_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e94e18382a97151194018cb781cf0fe22334ee35ac961ca0af3001abd6bc932
|
3 |
+
size 55480896
|
checkpoint-2000/optimizer_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63e6da3410ebcd43453c81d05a10eafb02c674cac7d1b8798b0933af17d3a218
|
3 |
+
size 55480032
|
checkpoint-2000/optimizer_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed8c41c0e613f61c6d53924a953f451e70a74c66778418a43b07e0a72d4837fc
|
3 |
+
size 55480032
|
checkpoint-2000/optimizer_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:75dd182d21f0d83d4f2e1d017b7ad1bc81d1cdd6b5f05c411eb7156db6af5b54
|
3 |
+
size 55480032
|
checkpoint-2000/optimizer_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1887e9892bd4a005864e6de571616f9850350f98993f8ddf36fcfddca8d360e4
|
3 |
+
size 55480032
|
checkpoint-2000/pytorch_model_fsdp_0/.metadata
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a19ab021f1ba6fc33e10c18810aae15bcdcc44f398f902265ff6fd94a02d4df
|
3 |
+
size 1064888
|
checkpoint-2000/pytorch_model_fsdp_0/__0_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:275c922a12364010c488d511c3b5e5de1bfe494ef4dbb54b1892668b1d4bfaac
|
3 |
+
size 27702864
|
checkpoint-2000/pytorch_model_fsdp_0/__1_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b96915d94e47d988a4d4c1fd5a228abcba8bb73c05b155cff6c893a0ebf85b09
|
3 |
+
size 27702864
|
checkpoint-2000/pytorch_model_fsdp_0/__2_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c852e5098594f95316cb3a0b872de32d839d418dc744165865805834e452b2a6
|
3 |
+
size 27702864
|
checkpoint-2000/pytorch_model_fsdp_0/__3_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:df3993232148d686149b082f51d8dbdc8c96c898b3d4b399724f87c614527e80
|
3 |
+
size 27702864
|
checkpoint-2000/pytorch_model_fsdp_0/__4_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b68262b2ed6d2b9c1b691f92a6ad19522c13477f38044762eccfd58b25d39963
|
3 |
+
size 27702864
|
checkpoint-2000/pytorch_model_fsdp_0/__5_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6f12638b582d1293d83d0d56e4ca8036a67a4bf47d1e68fb4260ce7492b4820d
|
3 |
+
size 27702864
|
checkpoint-2000/pytorch_model_fsdp_0/__6_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e9745a4bdc41fb2deb28b003850b909d73fa094c02afef0ad14e955dfe9b153
|
3 |
+
size 27702864
|
checkpoint-2000/pytorch_model_fsdp_0/__7_0.distcp
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5b433f984a2fbda86bfb4194be437e3ca37d25649ae5ce1ea1a70a13965cea7
|
3 |
+
size 27702864
|
checkpoint-2000/rng_state_0.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8204337523c9526120d9dbd77c1b85e83685a646168ef1aa5614cc1cc72b52f5
|
3 |
+
size 15984
|
checkpoint-2000/rng_state_1.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fcac6b4bd212354a8c95c4fd130500378c409f8c520e7ec730ba272ddee284d
|
3 |
+
size 15984
|
checkpoint-2000/rng_state_2.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a9bb45dc4e4d1accd280a5304c4c6b48809c92185e462b3df1633b8c69321c1
|
3 |
+
size 15984
|