Training in progress, step 360000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +203 -3
- pytorch_model.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 202194449
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c017dc7aa51a43ed67b54be4391afb3209331a95386c7a0f1faefbfda3688b82
|
3 |
size 202194449
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86d6df4782178506acacd0c83df02e5b041758e0dad6be6ff1fb20dae19c22b2
|
3 |
size 102501541
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
|
3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
|
3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
|
3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
|
3 |
size 14503
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
|
3 |
size 14503
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
|
3 |
size 14503
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
|
3 |
size 14503
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14503
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b9e7aa2b8e2ccb71915d50d53e92b984c37ae39fd879c08b1e03ec5f916c1ce
|
3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6e54422706a010aa16b679660182e5a0c0f546c43656852cb88a82c1d45dccf
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -7006,11 +7006,211 @@
|
|
7006 |
"eval_samples_per_second": 791.502,
|
7007 |
"eval_steps_per_second": 12.664,
|
7008 |
"step": 350000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7009 |
}
|
7010 |
],
|
7011 |
"max_steps": 500000,
|
7012 |
"num_train_epochs": 13,
|
7013 |
-
"total_flos": 1.
|
7014 |
"trial_name": null,
|
7015 |
"trial_params": null
|
7016 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 9.174779550435803,
|
5 |
+
"global_step": 360000,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
7006 |
"eval_samples_per_second": 791.502,
|
7007 |
"eval_steps_per_second": 12.664,
|
7008 |
"step": 350000
|
7009 |
+
},
|
7010 |
+
{
|
7011 |
+
"epoch": 8.93,
|
7012 |
+
"learning_rate": 7.529152489465592e-05,
|
7013 |
+
"loss": 0.277,
|
7014 |
+
"step": 350500
|
7015 |
+
},
|
7016 |
+
{
|
7017 |
+
"epoch": 8.95,
|
7018 |
+
"learning_rate": 7.489140439617708e-05,
|
7019 |
+
"loss": 0.2766,
|
7020 |
+
"step": 351000
|
7021 |
+
},
|
7022 |
+
{
|
7023 |
+
"epoch": 8.95,
|
7024 |
+
"eval_loss": 0.8036056160926819,
|
7025 |
+
"eval_runtime": 1.2298,
|
7026 |
+
"eval_samples_per_second": 813.125,
|
7027 |
+
"eval_steps_per_second": 13.01,
|
7028 |
+
"step": 351000
|
7029 |
+
},
|
7030 |
+
{
|
7031 |
+
"epoch": 8.96,
|
7032 |
+
"learning_rate": 7.449215995246522e-05,
|
7033 |
+
"loss": 0.2765,
|
7034 |
+
"step": 351500
|
7035 |
+
},
|
7036 |
+
{
|
7037 |
+
"epoch": 8.97,
|
7038 |
+
"learning_rate": 7.409379592959367e-05,
|
7039 |
+
"loss": 0.2765,
|
7040 |
+
"step": 352000
|
7041 |
+
},
|
7042 |
+
{
|
7043 |
+
"epoch": 8.97,
|
7044 |
+
"eval_loss": 0.8099916577339172,
|
7045 |
+
"eval_runtime": 1.258,
|
7046 |
+
"eval_samples_per_second": 794.9,
|
7047 |
+
"eval_steps_per_second": 12.718,
|
7048 |
+
"step": 352000
|
7049 |
+
},
|
7050 |
+
{
|
7051 |
+
"epoch": 8.98,
|
7052 |
+
"learning_rate": 7.369631668400746e-05,
|
7053 |
+
"loss": 0.2763,
|
7054 |
+
"step": 352500
|
7055 |
+
},
|
7056 |
+
{
|
7057 |
+
"epoch": 9.0,
|
7058 |
+
"learning_rate": 7.3299726562476e-05,
|
7059 |
+
"loss": 0.2762,
|
7060 |
+
"step": 353000
|
7061 |
+
},
|
7062 |
+
{
|
7063 |
+
"epoch": 9.0,
|
7064 |
+
"eval_loss": 0.8091428279876709,
|
7065 |
+
"eval_runtime": 1.2439,
|
7066 |
+
"eval_samples_per_second": 803.907,
|
7067 |
+
"eval_steps_per_second": 12.863,
|
7068 |
+
"step": 353000
|
7069 |
+
},
|
7070 |
+
{
|
7071 |
+
"epoch": 9.01,
|
7072 |
+
"learning_rate": 7.290402990204531e-05,
|
7073 |
+
"loss": 0.2763,
|
7074 |
+
"step": 353500
|
7075 |
+
},
|
7076 |
+
{
|
7077 |
+
"epoch": 9.02,
|
7078 |
+
"learning_rate": 7.250923102999073e-05,
|
7079 |
+
"loss": 0.2765,
|
7080 |
+
"step": 354000
|
7081 |
+
},
|
7082 |
+
{
|
7083 |
+
"epoch": 9.02,
|
7084 |
+
"eval_loss": 0.8080966472625732,
|
7085 |
+
"eval_runtime": 1.3706,
|
7086 |
+
"eval_samples_per_second": 729.606,
|
7087 |
+
"eval_steps_per_second": 11.674,
|
7088 |
+
"step": 354000
|
7089 |
+
},
|
7090 |
+
{
|
7091 |
+
"epoch": 9.03,
|
7092 |
+
"learning_rate": 7.211533426376934e-05,
|
7093 |
+
"loss": 0.2762,
|
7094 |
+
"step": 354500
|
7095 |
+
},
|
7096 |
+
{
|
7097 |
+
"epoch": 9.05,
|
7098 |
+
"learning_rate": 7.172234391097317e-05,
|
7099 |
+
"loss": 0.2763,
|
7100 |
+
"step": 355000
|
7101 |
+
},
|
7102 |
+
{
|
7103 |
+
"epoch": 9.05,
|
7104 |
+
"eval_loss": 0.8072100877761841,
|
7105 |
+
"eval_runtime": 1.3323,
|
7106 |
+
"eval_samples_per_second": 750.572,
|
7107 |
+
"eval_steps_per_second": 12.009,
|
7108 |
+
"step": 355000
|
7109 |
+
},
|
7110 |
+
{
|
7111 |
+
"epoch": 9.06,
|
7112 |
+
"learning_rate": 7.133026426928173e-05,
|
7113 |
+
"loss": 0.2764,
|
7114 |
+
"step": 355500
|
7115 |
+
},
|
7116 |
+
{
|
7117 |
+
"epoch": 9.07,
|
7118 |
+
"learning_rate": 7.093909962641514e-05,
|
7119 |
+
"loss": 0.2763,
|
7120 |
+
"step": 356000
|
7121 |
+
},
|
7122 |
+
{
|
7123 |
+
"epoch": 9.07,
|
7124 |
+
"eval_loss": 0.8050107359886169,
|
7125 |
+
"eval_runtime": 1.3391,
|
7126 |
+
"eval_samples_per_second": 746.746,
|
7127 |
+
"eval_steps_per_second": 11.948,
|
7128 |
+
"step": 356000
|
7129 |
+
},
|
7130 |
+
{
|
7131 |
+
"epoch": 9.09,
|
7132 |
+
"learning_rate": 7.054885426008737e-05,
|
7133 |
+
"loss": 0.276,
|
7134 |
+
"step": 356500
|
7135 |
+
},
|
7136 |
+
{
|
7137 |
+
"epoch": 9.1,
|
7138 |
+
"learning_rate": 7.015953243795907e-05,
|
7139 |
+
"loss": 0.2763,
|
7140 |
+
"step": 357000
|
7141 |
+
},
|
7142 |
+
{
|
7143 |
+
"epoch": 9.1,
|
7144 |
+
"eval_loss": 0.8131558299064636,
|
7145 |
+
"eval_runtime": 1.3479,
|
7146 |
+
"eval_samples_per_second": 741.911,
|
7147 |
+
"eval_steps_per_second": 11.871,
|
7148 |
+
"step": 357000
|
7149 |
+
},
|
7150 |
+
{
|
7151 |
+
"epoch": 9.11,
|
7152 |
+
"learning_rate": 6.97711384175914e-05,
|
7153 |
+
"loss": 0.2762,
|
7154 |
+
"step": 357500
|
7155 |
+
},
|
7156 |
+
{
|
7157 |
+
"epoch": 9.12,
|
7158 |
+
"learning_rate": 6.938367644639911e-05,
|
7159 |
+
"loss": 0.2758,
|
7160 |
+
"step": 358000
|
7161 |
+
},
|
7162 |
+
{
|
7163 |
+
"epoch": 9.12,
|
7164 |
+
"eval_loss": 0.8091667294502258,
|
7165 |
+
"eval_runtime": 1.3351,
|
7166 |
+
"eval_samples_per_second": 749.026,
|
7167 |
+
"eval_steps_per_second": 11.984,
|
7168 |
+
"step": 358000
|
7169 |
+
},
|
7170 |
+
{
|
7171 |
+
"epoch": 9.14,
|
7172 |
+
"learning_rate": 6.899715076160425e-05,
|
7173 |
+
"loss": 0.2757,
|
7174 |
+
"step": 358500
|
7175 |
+
},
|
7176 |
+
{
|
7177 |
+
"epoch": 9.15,
|
7178 |
+
"learning_rate": 6.861156559018986e-05,
|
7179 |
+
"loss": 0.2758,
|
7180 |
+
"step": 359000
|
7181 |
+
},
|
7182 |
+
{
|
7183 |
+
"epoch": 9.15,
|
7184 |
+
"eval_loss": 0.8032931685447693,
|
7185 |
+
"eval_runtime": 1.2963,
|
7186 |
+
"eval_samples_per_second": 771.398,
|
7187 |
+
"eval_steps_per_second": 12.342,
|
7188 |
+
"step": 359000
|
7189 |
+
},
|
7190 |
+
{
|
7191 |
+
"epoch": 9.16,
|
7192 |
+
"learning_rate": 6.822692514885346e-05,
|
7193 |
+
"loss": 0.2757,
|
7194 |
+
"step": 359500
|
7195 |
+
},
|
7196 |
+
{
|
7197 |
+
"epoch": 9.17,
|
7198 |
+
"learning_rate": 6.784323364396135e-05,
|
7199 |
+
"loss": 0.2757,
|
7200 |
+
"step": 360000
|
7201 |
+
},
|
7202 |
+
{
|
7203 |
+
"epoch": 9.17,
|
7204 |
+
"eval_loss": 0.8121919631958008,
|
7205 |
+
"eval_runtime": 1.3627,
|
7206 |
+
"eval_samples_per_second": 733.817,
|
7207 |
+
"eval_steps_per_second": 11.741,
|
7208 |
+
"step": 360000
|
7209 |
}
|
7210 |
],
|
7211 |
"max_steps": 500000,
|
7212 |
"num_train_epochs": 13,
|
7213 |
+
"total_flos": 1.1501415685664595e+22,
|
7214 |
"trial_name": null,
|
7215 |
"trial_params": null
|
7216 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 102501541
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:86d6df4782178506acacd0c83df02e5b041758e0dad6be6ff1fb20dae19c22b2
|
3 |
size 102501541
|