joelniklaus
commited on
Commit
•
b7398da
1
Parent(s):
0f28af8
Training in progress, step 850000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scaler.pt +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +312 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 885330713
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:377aef4dd059b74792c5352ef81f8e44e00ebae2aba8904e27b6d7bddc64b4be
|
3 |
size 885330713
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 442678571
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:157afe62666c333a6a822593f8ac020489c93154c07c70eed8f687050fb132b0
|
3 |
size 442678571
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17563
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eed8b2b295b4b043aa8b2feddb689917b282e526c45b6eb23bd29ebdfbe6d494
|
3 |
size 17563
|
last-checkpoint/scaler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 559
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:065f3ff4df60950ed9b6903fc98546b04d3292228f5f7ea2579fd161acf366b2
|
3 |
size 559
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4cd1b1b8a91f71cb25cb8a1516a3c8cda9e53c3b6dd58a61caf55df95b8a1c51
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 1.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -4950,11 +4950,320 @@
|
|
4950 |
"eval_samples_per_second": 415.274,
|
4951 |
"eval_steps_per_second": 0.831,
|
4952 |
"step": 800000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4953 |
}
|
4954 |
],
|
4955 |
"max_steps": 1000000,
|
4956 |
"num_train_epochs": 9223372036854775807,
|
4957 |
-
"total_flos": 5.
|
4958 |
"trial_name": null,
|
4959 |
"trial_params": null
|
4960 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.239467,
|
5 |
+
"global_step": 850000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
4950 |
"eval_samples_per_second": 415.274,
|
4951 |
"eval_steps_per_second": 0.831,
|
4952 |
"step": 800000
|
4953 |
+
},
|
4954 |
+
{
|
4955 |
+
"epoch": 1.19,
|
4956 |
+
"learning_rate": 1.0473407182373813e-05,
|
4957 |
+
"loss": 0.9342,
|
4958 |
+
"step": 801000
|
4959 |
+
},
|
4960 |
+
{
|
4961 |
+
"epoch": 1.19,
|
4962 |
+
"learning_rate": 1.0372361544374464e-05,
|
4963 |
+
"loss": 0.8919,
|
4964 |
+
"step": 802000
|
4965 |
+
},
|
4966 |
+
{
|
4967 |
+
"epoch": 1.19,
|
4968 |
+
"learning_rate": 1.0271849663326171e-05,
|
4969 |
+
"loss": 0.8567,
|
4970 |
+
"step": 803000
|
4971 |
+
},
|
4972 |
+
{
|
4973 |
+
"epoch": 1.19,
|
4974 |
+
"learning_rate": 1.0171671413607247e-05,
|
4975 |
+
"loss": 0.9096,
|
4976 |
+
"step": 804000
|
4977 |
+
},
|
4978 |
+
{
|
4979 |
+
"epoch": 1.19,
|
4980 |
+
"learning_rate": 1.0072028244878407e-05,
|
4981 |
+
"loss": 1.0644,
|
4982 |
+
"step": 805000
|
4983 |
+
},
|
4984 |
+
{
|
4985 |
+
"epoch": 1.2,
|
4986 |
+
"learning_rate": 9.97272176118008e-06,
|
4987 |
+
"loss": 1.0327,
|
4988 |
+
"step": 806000
|
4989 |
+
},
|
4990 |
+
{
|
4991 |
+
"epoch": 1.2,
|
4992 |
+
"learning_rate": 9.873853009383633e-06,
|
4993 |
+
"loss": 1.076,
|
4994 |
+
"step": 807000
|
4995 |
+
},
|
4996 |
+
{
|
4997 |
+
"epoch": 1.2,
|
4998 |
+
"learning_rate": 9.775423070701888e-06,
|
4999 |
+
"loss": 1.0958,
|
5000 |
+
"step": 808000
|
5001 |
+
},
|
5002 |
+
{
|
5003 |
+
"epoch": 1.2,
|
5004 |
+
"learning_rate": 9.67753079151617e-06,
|
5005 |
+
"loss": 1.0053,
|
5006 |
+
"step": 809000
|
5007 |
+
},
|
5008 |
+
{
|
5009 |
+
"epoch": 1.2,
|
5010 |
+
"learning_rate": 9.57998126200062e-06,
|
5011 |
+
"loss": 0.8648,
|
5012 |
+
"step": 810000
|
5013 |
+
},
|
5014 |
+
{
|
5015 |
+
"epoch": 1.2,
|
5016 |
+
"learning_rate": 9.482970645690526e-06,
|
5017 |
+
"loss": 0.8919,
|
5018 |
+
"step": 811000
|
5019 |
+
},
|
5020 |
+
{
|
5021 |
+
"epoch": 1.2,
|
5022 |
+
"learning_rate": 9.38630578820755e-06,
|
5023 |
+
"loss": 0.8439,
|
5024 |
+
"step": 812000
|
5025 |
+
},
|
5026 |
+
{
|
5027 |
+
"epoch": 1.2,
|
5028 |
+
"learning_rate": 9.290181074089233e-06,
|
5029 |
+
"loss": 0.7462,
|
5030 |
+
"step": 813000
|
5031 |
+
},
|
5032 |
+
{
|
5033 |
+
"epoch": 1.2,
|
5034 |
+
"learning_rate": 9.194405112845483e-06,
|
5035 |
+
"loss": 0.8975,
|
5036 |
+
"step": 814000
|
5037 |
+
},
|
5038 |
+
{
|
5039 |
+
"epoch": 1.2,
|
5040 |
+
"learning_rate": 9.099170501521598e-06,
|
5041 |
+
"loss": 0.9167,
|
5042 |
+
"step": 815000
|
5043 |
+
},
|
5044 |
+
{
|
5045 |
+
"epoch": 1.21,
|
5046 |
+
"learning_rate": 9.00428762187942e-06,
|
5047 |
+
"loss": 0.8518,
|
5048 |
+
"step": 816000
|
5049 |
+
},
|
5050 |
+
{
|
5051 |
+
"epoch": 1.21,
|
5052 |
+
"learning_rate": 8.909947275055568e-06,
|
5053 |
+
"loss": 0.8843,
|
5054 |
+
"step": 817000
|
5055 |
+
},
|
5056 |
+
{
|
5057 |
+
"epoch": 1.21,
|
5058 |
+
"learning_rate": 8.815961623350038e-06,
|
5059 |
+
"loss": 0.926,
|
5060 |
+
"step": 818000
|
5061 |
+
},
|
5062 |
+
{
|
5063 |
+
"epoch": 1.21,
|
5064 |
+
"learning_rate": 8.722519663652901e-06,
|
5065 |
+
"loss": 0.9408,
|
5066 |
+
"step": 819000
|
5067 |
+
},
|
5068 |
+
{
|
5069 |
+
"epoch": 1.21,
|
5070 |
+
"learning_rate": 8.629435347010716e-06,
|
5071 |
+
"loss": 0.7628,
|
5072 |
+
"step": 820000
|
5073 |
+
},
|
5074 |
+
{
|
5075 |
+
"epoch": 1.21,
|
5076 |
+
"learning_rate": 8.536803452235437e-06,
|
5077 |
+
"loss": 0.7669,
|
5078 |
+
"step": 821000
|
5079 |
+
},
|
5080 |
+
{
|
5081 |
+
"epoch": 1.21,
|
5082 |
+
"learning_rate": 8.444624992334588e-06,
|
5083 |
+
"loss": 0.7749,
|
5084 |
+
"step": 822000
|
5085 |
+
},
|
5086 |
+
{
|
5087 |
+
"epoch": 1.21,
|
5088 |
+
"learning_rate": 8.352992472045557e-06,
|
5089 |
+
"loss": 0.7575,
|
5090 |
+
"step": 823000
|
5091 |
+
},
|
5092 |
+
{
|
5093 |
+
"epoch": 1.21,
|
5094 |
+
"learning_rate": 8.26172344512513e-06,
|
5095 |
+
"loss": 0.899,
|
5096 |
+
"step": 824000
|
5097 |
+
},
|
5098 |
+
{
|
5099 |
+
"epoch": 1.21,
|
5100 |
+
"learning_rate": 8.171001445569593e-06,
|
5101 |
+
"loss": 0.9519,
|
5102 |
+
"step": 825000
|
5103 |
+
},
|
5104 |
+
{
|
5105 |
+
"epoch": 1.22,
|
5106 |
+
"learning_rate": 8.080645840041112e-06,
|
5107 |
+
"loss": 0.9472,
|
5108 |
+
"step": 826000
|
5109 |
+
},
|
5110 |
+
{
|
5111 |
+
"epoch": 1.22,
|
5112 |
+
"learning_rate": 7.990838325725758e-06,
|
5113 |
+
"loss": 0.9576,
|
5114 |
+
"step": 827000
|
5115 |
+
},
|
5116 |
+
{
|
5117 |
+
"epoch": 1.22,
|
5118 |
+
"learning_rate": 7.901400090084665e-06,
|
5119 |
+
"loss": 0.9493,
|
5120 |
+
"step": 828000
|
5121 |
+
},
|
5122 |
+
{
|
5123 |
+
"epoch": 1.22,
|
5124 |
+
"learning_rate": 7.81251098555364e-06,
|
5125 |
+
"loss": 0.8743,
|
5126 |
+
"step": 829000
|
5127 |
+
},
|
5128 |
+
{
|
5129 |
+
"epoch": 1.22,
|
5130 |
+
"learning_rate": 7.723994028206778e-06,
|
5131 |
+
"loss": 0.8593,
|
5132 |
+
"step": 830000
|
5133 |
+
},
|
5134 |
+
{
|
5135 |
+
"epoch": 1.22,
|
5136 |
+
"learning_rate": 7.636027217870157e-06,
|
5137 |
+
"loss": 0.8767,
|
5138 |
+
"step": 831000
|
5139 |
+
},
|
5140 |
+
{
|
5141 |
+
"epoch": 1.22,
|
5142 |
+
"learning_rate": 7.54843540696496e-06,
|
5143 |
+
"loss": 0.853,
|
5144 |
+
"step": 832000
|
5145 |
+
},
|
5146 |
+
{
|
5147 |
+
"epoch": 1.22,
|
5148 |
+
"learning_rate": 7.461394734929022e-06,
|
5149 |
+
"loss": 0.8742,
|
5150 |
+
"step": 833000
|
5151 |
+
},
|
5152 |
+
{
|
5153 |
+
"epoch": 1.22,
|
5154 |
+
"learning_rate": 7.374731898184495e-06,
|
5155 |
+
"loss": 1.0273,
|
5156 |
+
"step": 834000
|
5157 |
+
},
|
5158 |
+
{
|
5159 |
+
"epoch": 1.22,
|
5160 |
+
"learning_rate": 7.2886211680837424e-06,
|
5161 |
+
"loss": 1.0271,
|
5162 |
+
"step": 835000
|
5163 |
+
},
|
5164 |
+
{
|
5165 |
+
"epoch": 1.23,
|
5166 |
+
"learning_rate": 7.202891092623126e-06,
|
5167 |
+
"loss": 1.049,
|
5168 |
+
"step": 836000
|
5169 |
+
},
|
5170 |
+
{
|
5171 |
+
"epoch": 1.23,
|
5172 |
+
"learning_rate": 7.11771406745404e-06,
|
5173 |
+
"loss": 1.0773,
|
5174 |
+
"step": 837000
|
5175 |
+
},
|
5176 |
+
{
|
5177 |
+
"epoch": 1.23,
|
5178 |
+
"learning_rate": 7.032920499639423e-06,
|
5179 |
+
"loss": 1.0882,
|
5180 |
+
"step": 838000
|
5181 |
+
},
|
5182 |
+
{
|
5183 |
+
"epoch": 1.23,
|
5184 |
+
"learning_rate": 6.94868090159605e-06,
|
5185 |
+
"loss": 0.9609,
|
5186 |
+
"step": 839000
|
5187 |
+
},
|
5188 |
+
{
|
5189 |
+
"epoch": 1.23,
|
5190 |
+
"learning_rate": 6.864827546864583e-06,
|
5191 |
+
"loss": 0.9468,
|
5192 |
+
"step": 840000
|
5193 |
+
},
|
5194 |
+
{
|
5195 |
+
"epoch": 1.23,
|
5196 |
+
"learning_rate": 6.781529057175845e-06,
|
5197 |
+
"loss": 0.8922,
|
5198 |
+
"step": 841000
|
5199 |
+
},
|
5200 |
+
{
|
5201 |
+
"epoch": 1.23,
|
5202 |
+
"learning_rate": 6.698619579877818e-06,
|
5203 |
+
"loss": 0.7689,
|
5204 |
+
"step": 842000
|
5205 |
+
},
|
5206 |
+
{
|
5207 |
+
"epoch": 1.23,
|
5208 |
+
"learning_rate": 6.616183639538559e-06,
|
5209 |
+
"loss": 0.852,
|
5210 |
+
"step": 843000
|
5211 |
+
},
|
5212 |
+
{
|
5213 |
+
"epoch": 1.23,
|
5214 |
+
"learning_rate": 6.534385586581854e-06,
|
5215 |
+
"loss": 1.0235,
|
5216 |
+
"step": 844000
|
5217 |
+
},
|
5218 |
+
{
|
5219 |
+
"epoch": 1.23,
|
5220 |
+
"learning_rate": 6.452898467929852e-06,
|
5221 |
+
"loss": 0.9675,
|
5222 |
+
"step": 845000
|
5223 |
+
},
|
5224 |
+
{
|
5225 |
+
"epoch": 1.24,
|
5226 |
+
"learning_rate": 6.371887573403335e-06,
|
5227 |
+
"loss": 0.9587,
|
5228 |
+
"step": 846000
|
5229 |
+
},
|
5230 |
+
{
|
5231 |
+
"epoch": 1.24,
|
5232 |
+
"learning_rate": 6.29135378892447e-06,
|
5233 |
+
"loss": 1.0189,
|
5234 |
+
"step": 847000
|
5235 |
+
},
|
5236 |
+
{
|
5237 |
+
"epoch": 1.24,
|
5238 |
+
"learning_rate": 6.211377811943364e-06,
|
5239 |
+
"loss": 0.9343,
|
5240 |
+
"step": 848000
|
5241 |
+
},
|
5242 |
+
{
|
5243 |
+
"epoch": 1.24,
|
5244 |
+
"learning_rate": 6.131879743067948e-06,
|
5245 |
+
"loss": 0.7473,
|
5246 |
+
"step": 849000
|
5247 |
+
},
|
5248 |
+
{
|
5249 |
+
"epoch": 1.24,
|
5250 |
+
"learning_rate": 6.0528604499385185e-06,
|
5251 |
+
"loss": 0.7263,
|
5252 |
+
"step": 850000
|
5253 |
+
},
|
5254 |
+
{
|
5255 |
+
"epoch": 1.24,
|
5256 |
+
"eval_accuracy": 0.8377542903972233,
|
5257 |
+
"eval_loss": 0.7093546986579895,
|
5258 |
+
"eval_runtime": 11.2849,
|
5259 |
+
"eval_samples_per_second": 443.071,
|
5260 |
+
"eval_steps_per_second": 0.886,
|
5261 |
+
"step": 850000
|
5262 |
}
|
5263 |
],
|
5264 |
"max_steps": 1000000,
|
5265 |
"num_train_epochs": 9223372036854775807,
|
5266 |
+
"total_flos": 5.727433420305334e+19,
|
5267 |
"trial_name": null,
|
5268 |
"trial_params": null
|
5269 |
}
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e25faf06c9975aea2aa997e353bc16bb59f3eabd76c81f0c49c46d732c973aeb
|
3 |
size 3503
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 442678571
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:157afe62666c333a6a822593f8ac020489c93154c07c70eed8f687050fb132b0
|
3 |
size 442678571
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e25faf06c9975aea2aa997e353bc16bb59f3eabd76c81f0c49c46d732c973aeb
|
3 |
size 3503
|