plip commited on
Commit
15b0c39
1 Parent(s): f024f0a

Training in progress, step 260000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3ab9dc056c74cdf5d65dc3a03d7bd30aade58ae1e720539569500f125901ec6
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a6cf8e52153476c0d24566862211ae73b151847958bccb312e482641f34fe2f
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b912672be50d1a4d1c80a97a69c79548a9895fd6edfdbc5e52d33a97a1f00535
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20d31269cc8100e5fc218adcef1522ff7b0dd07e50b44819d9df2a65286c8129
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80275ad90d449cfcd19f9c9b9d842c8c524989912548b0b01f19b4b413629488
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80275ad90d449cfcd19f9c9b9d842c8c524989912548b0b01f19b4b413629488
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80275ad90d449cfcd19f9c9b9d842c8c524989912548b0b01f19b4b413629488
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80275ad90d449cfcd19f9c9b9d842c8c524989912548b0b01f19b4b413629488
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80275ad90d449cfcd19f9c9b9d842c8c524989912548b0b01f19b4b413629488
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80275ad90d449cfcd19f9c9b9d842c8c524989912548b0b01f19b4b413629488
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80275ad90d449cfcd19f9c9b9d842c8c524989912548b0b01f19b4b413629488
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80275ad90d449cfcd19f9c9b9d842c8c524989912548b0b01f19b4b413629488
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27a5987a387bd5d5448f919b8fbc047d2f79f2d23737fd4d602720c8f9c95204
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:438f3db980ad0547f739432602e1f85cc46b6fbef312b9261fd3b355ceeb97af
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b7fe86b6bf62db9f7989d6e264b9b70447a29a8d4bbea419af77ab1989ca356
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.37137468780264,
5
- "global_step": 250000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5006,11 +5006,211 @@
5006
  "eval_samples_per_second": 788.807,
5007
  "eval_steps_per_second": 12.621,
5008
  "step": 250000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5009
  }
5010
  ],
5011
  "max_steps": 500000,
5012
  "num_train_epochs": 13,
5013
- "total_flos": 7.98709759588423e+21,
5014
  "trial_name": null,
5015
  "trial_params": null
5016
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.626229675314746,
5
+ "global_step": 260000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5006
  "eval_samples_per_second": 788.807,
5007
  "eval_steps_per_second": 12.621,
5008
  "step": 250000
5009
+ },
5010
+ {
5011
+ "epoch": 6.38,
5012
+ "learning_rate": 0.00016649607200138356,
5013
+ "loss": 0.2877,
5014
+ "step": 250500
5015
+ },
5016
+ {
5017
+ "epoch": 6.4,
5018
+ "learning_rate": 0.00016601801319007743,
5019
+ "loss": 0.2877,
5020
+ "step": 251000
5021
+ },
5022
+ {
5023
+ "epoch": 6.4,
5024
+ "eval_loss": 0.810385525226593,
5025
+ "eval_runtime": 1.2931,
5026
+ "eval_samples_per_second": 773.361,
5027
+ "eval_steps_per_second": 12.374,
5028
+ "step": 251000
5029
+ },
5030
+ {
5031
+ "epoch": 6.41,
5032
+ "learning_rate": 0.00016553983388754428,
5033
+ "loss": 0.2872,
5034
+ "step": 251500
5035
+ },
5036
+ {
5037
+ "epoch": 6.42,
5038
+ "learning_rate": 0.00016506153932307636,
5039
+ "loss": 0.2872,
5040
+ "step": 252000
5041
+ },
5042
+ {
5043
+ "epoch": 6.42,
5044
+ "eval_loss": 0.8114423155784607,
5045
+ "eval_runtime": 1.2618,
5046
+ "eval_samples_per_second": 792.538,
5047
+ "eval_steps_per_second": 12.681,
5048
+ "step": 252000
5049
+ },
5050
+ {
5051
+ "epoch": 6.44,
5052
+ "learning_rate": 0.00016458313472722638,
5053
+ "loss": 0.2874,
5054
+ "step": 252500
5055
+ },
5056
+ {
5057
+ "epoch": 6.45,
5058
+ "learning_rate": 0.00016410462533175045,
5059
+ "loss": 0.2871,
5060
+ "step": 253000
5061
+ },
5062
+ {
5063
+ "epoch": 6.45,
5064
+ "eval_loss": 0.814827024936676,
5065
+ "eval_runtime": 1.3254,
5066
+ "eval_samples_per_second": 754.465,
5067
+ "eval_steps_per_second": 12.071,
5068
+ "step": 253000
5069
+ },
5070
+ {
5071
+ "epoch": 6.46,
5072
+ "learning_rate": 0.00016362601636955049,
5073
+ "loss": 0.288,
5074
+ "step": 253500
5075
+ },
5076
+ {
5077
+ "epoch": 6.47,
5078
+ "learning_rate": 0.00016314731307461754,
5079
+ "loss": 0.2875,
5080
+ "step": 254000
5081
+ },
5082
+ {
5083
+ "epoch": 6.47,
5084
+ "eval_loss": 0.812713623046875,
5085
+ "eval_runtime": 1.2619,
5086
+ "eval_samples_per_second": 792.485,
5087
+ "eval_steps_per_second": 12.68,
5088
+ "step": 254000
5089
+ },
5090
+ {
5091
+ "epoch": 6.49,
5092
+ "learning_rate": 0.0001626685206819742,
5093
+ "loss": 0.2874,
5094
+ "step": 254500
5095
+ },
5096
+ {
5097
+ "epoch": 6.5,
5098
+ "learning_rate": 0.0001621896444276172,
5099
+ "loss": 0.287,
5100
+ "step": 255000
5101
+ },
5102
+ {
5103
+ "epoch": 6.5,
5104
+ "eval_loss": 0.820074200630188,
5105
+ "eval_runtime": 1.2181,
5106
+ "eval_samples_per_second": 820.971,
5107
+ "eval_steps_per_second": 13.136,
5108
+ "step": 255000
5109
+ },
5110
+ {
5111
+ "epoch": 6.51,
5112
+ "learning_rate": 0.00016171068954846067,
5113
+ "loss": 0.2865,
5114
+ "step": 255500
5115
+ },
5116
+ {
5117
+ "epoch": 6.52,
5118
+ "learning_rate": 0.00016123166128227835,
5119
+ "loss": 0.2869,
5120
+ "step": 256000
5121
+ },
5122
+ {
5123
+ "epoch": 6.52,
5124
+ "eval_loss": 0.8100768327713013,
5125
+ "eval_runtime": 1.2652,
5126
+ "eval_samples_per_second": 790.379,
5127
+ "eval_steps_per_second": 12.646,
5128
+ "step": 256000
5129
+ },
5130
+ {
5131
+ "epoch": 6.54,
5132
+ "learning_rate": 0.0001607525648676467,
5133
+ "loss": 0.2868,
5134
+ "step": 256500
5135
+ },
5136
+ {
5137
+ "epoch": 6.55,
5138
+ "learning_rate": 0.0001602734055438873,
5139
+ "loss": 0.2868,
5140
+ "step": 257000
5141
+ },
5142
+ {
5143
+ "epoch": 6.55,
5144
+ "eval_loss": 0.814249575138092,
5145
+ "eval_runtime": 1.2757,
5146
+ "eval_samples_per_second": 783.912,
5147
+ "eval_steps_per_second": 12.543,
5148
+ "step": 257000
5149
+ },
5150
+ {
5151
+ "epoch": 6.56,
5152
+ "learning_rate": 0.00015979418855100963,
5153
+ "loss": 0.2869,
5154
+ "step": 257500
5155
+ },
5156
+ {
5157
+ "epoch": 6.58,
5158
+ "learning_rate": 0.00015931491912965417,
5159
+ "loss": 0.2869,
5160
+ "step": 258000
5161
+ },
5162
+ {
5163
+ "epoch": 6.58,
5164
+ "eval_loss": 0.815778374671936,
5165
+ "eval_runtime": 1.2351,
5166
+ "eval_samples_per_second": 809.649,
5167
+ "eval_steps_per_second": 12.954,
5168
+ "step": 258000
5169
+ },
5170
+ {
5171
+ "epoch": 6.59,
5172
+ "learning_rate": 0.0001588356025210344,
5173
+ "loss": 0.2866,
5174
+ "step": 258500
5175
+ },
5176
+ {
5177
+ "epoch": 6.6,
5178
+ "learning_rate": 0.00015835624396688,
5179
+ "loss": 0.2868,
5180
+ "step": 259000
5181
+ },
5182
+ {
5183
+ "epoch": 6.6,
5184
+ "eval_loss": 0.81247878074646,
5185
+ "eval_runtime": 1.3012,
5186
+ "eval_samples_per_second": 768.517,
5187
+ "eval_steps_per_second": 12.296,
5188
+ "step": 259000
5189
+ },
5190
+ {
5191
+ "epoch": 6.61,
5192
+ "learning_rate": 0.00015787684870937924,
5193
+ "loss": 0.2862,
5194
+ "step": 259500
5195
+ },
5196
+ {
5197
+ "epoch": 6.63,
5198
+ "learning_rate": 0.00015739742199112196,
5199
+ "loss": 0.2865,
5200
+ "step": 260000
5201
+ },
5202
+ {
5203
+ "epoch": 6.63,
5204
+ "eval_loss": 0.816739559173584,
5205
+ "eval_runtime": 1.3361,
5206
+ "eval_samples_per_second": 748.473,
5207
+ "eval_steps_per_second": 11.976,
5208
+ "step": 260000
5209
  }
5210
  ],
5211
  "max_steps": 500000,
5212
  "num_train_epochs": 13,
5213
+ "total_flos": 8.30658473465873e+21,
5214
  "trial_name": null,
5215
  "trial_params": null
5216
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b912672be50d1a4d1c80a97a69c79548a9895fd6edfdbc5e52d33a97a1f00535
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20d31269cc8100e5fc218adcef1522ff7b0dd07e50b44819d9df2a65286c8129
3
  size 102501541