DreamyBeaver
commited on
Commit
•
c29b68a
1
Parent(s):
791e49a
Update model with 10 epochs training instead of 5
Browse files- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +93 -36
- training_args.bin +2 -2
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 535701061
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6c0959ac08c4fdfbb1a577f3b4842153ff8bdaec08671db6971d10057890ba8
|
3 |
size 535701061
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 267855533
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:982a429bea27cf57937ff13a0a06b0bdcd583103106f091c13b374ff4a8befe8
|
3 |
size 267855533
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14575
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c24ded61278d3bdb501c4e495dd10ddf01fa15e0368e52de2a102c5edb6db5a3
|
3 |
size 14575
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e71dec5fad31471bb08f1e33f5fe0414a42f69c80b949819391dcaab38f181f3
|
3 |
size 627
|
trainer_state.json
CHANGED
@@ -1,73 +1,130 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
9 |
"log_history": [
|
10 |
{
|
11 |
"epoch": 1.0,
|
12 |
-
"eval_accuracy": 0.
|
13 |
-
"eval_loss": 0.
|
14 |
-
"eval_runtime":
|
15 |
-
"eval_samples_per_second":
|
16 |
-
"eval_steps_per_second":
|
17 |
"step": 205
|
18 |
},
|
19 |
{
|
20 |
"epoch": 2.0,
|
21 |
-
"eval_accuracy": 0.
|
22 |
-
"eval_loss": 0.
|
23 |
-
"eval_runtime":
|
24 |
-
"eval_samples_per_second":
|
25 |
-
"eval_steps_per_second":
|
26 |
"step": 410
|
27 |
},
|
28 |
{
|
29 |
"epoch": 2.44,
|
30 |
-
"learning_rate": 1.
|
31 |
-
"loss": 0.
|
32 |
"step": 500
|
33 |
},
|
34 |
{
|
35 |
"epoch": 3.0,
|
36 |
-
"eval_accuracy": 0.
|
37 |
-
"eval_loss": 0.
|
38 |
-
"eval_runtime":
|
39 |
-
"eval_samples_per_second":
|
40 |
-
"eval_steps_per_second":
|
41 |
"step": 615
|
42 |
},
|
43 |
{
|
44 |
"epoch": 4.0,
|
45 |
-
"eval_accuracy": 0.
|
46 |
-
"eval_loss": 0.
|
47 |
-
"eval_runtime":
|
48 |
-
"eval_samples_per_second":
|
49 |
-
"eval_steps_per_second":
|
50 |
"step": 820
|
51 |
},
|
52 |
{
|
53 |
"epoch": 4.88,
|
54 |
-
"learning_rate":
|
55 |
-
"loss": 0.
|
56 |
"step": 1000
|
57 |
},
|
58 |
{
|
59 |
"epoch": 5.0,
|
60 |
-
"eval_accuracy": 0.
|
61 |
-
"eval_loss": 0.
|
62 |
-
"eval_runtime":
|
63 |
-
"eval_samples_per_second":
|
64 |
-
"eval_steps_per_second":
|
65 |
"step": 1025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
}
|
67 |
],
|
68 |
-
"max_steps":
|
69 |
-
"num_train_epochs":
|
70 |
-
"total_flos":
|
71 |
"trial_name": null,
|
72 |
"trial_params": null
|
73 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.20570282638072968,
|
3 |
+
"best_model_checkpoint": "DistilBERT1/checkpoint-205",
|
4 |
+
"epoch": 10.0,
|
5 |
+
"global_step": 2050,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
9 |
"log_history": [
|
10 |
{
|
11 |
"epoch": 1.0,
|
12 |
+
"eval_accuracy": 0.921028466483012,
|
13 |
+
"eval_loss": 0.20570282638072968,
|
14 |
+
"eval_runtime": 6.0089,
|
15 |
+
"eval_samples_per_second": 181.232,
|
16 |
+
"eval_steps_per_second": 11.483,
|
17 |
"step": 205
|
18 |
},
|
19 |
{
|
20 |
"epoch": 2.0,
|
21 |
+
"eval_accuracy": 0.9302112029384757,
|
22 |
+
"eval_loss": 0.2391415685415268,
|
23 |
+
"eval_runtime": 6.3171,
|
24 |
+
"eval_samples_per_second": 172.388,
|
25 |
+
"eval_steps_per_second": 10.923,
|
26 |
"step": 410
|
27 |
},
|
28 |
{
|
29 |
"epoch": 2.44,
|
30 |
+
"learning_rate": 1.5121951219512196e-05,
|
31 |
+
"loss": 0.2164,
|
32 |
"step": 500
|
33 |
},
|
34 |
{
|
35 |
"epoch": 3.0,
|
36 |
+
"eval_accuracy": 0.9320477502295684,
|
37 |
+
"eval_loss": 0.2756326496601105,
|
38 |
+
"eval_runtime": 6.7274,
|
39 |
+
"eval_samples_per_second": 161.875,
|
40 |
+
"eval_steps_per_second": 10.257,
|
41 |
"step": 615
|
42 |
},
|
43 |
{
|
44 |
"epoch": 4.0,
|
45 |
+
"eval_accuracy": 0.9274563820018366,
|
46 |
+
"eval_loss": 0.322733998298645,
|
47 |
+
"eval_runtime": 6.7541,
|
48 |
+
"eval_samples_per_second": 161.235,
|
49 |
+
"eval_steps_per_second": 10.216,
|
50 |
"step": 820
|
51 |
},
|
52 |
{
|
53 |
"epoch": 4.88,
|
54 |
+
"learning_rate": 1.024390243902439e-05,
|
55 |
+
"loss": 0.0376,
|
56 |
"step": 1000
|
57 |
},
|
58 |
{
|
59 |
"epoch": 5.0,
|
60 |
+
"eval_accuracy": 0.9329660238751147,
|
61 |
+
"eval_loss": 0.3475565016269684,
|
62 |
+
"eval_runtime": 6.7958,
|
63 |
+
"eval_samples_per_second": 160.245,
|
64 |
+
"eval_steps_per_second": 10.153,
|
65 |
"step": 1025
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 6.0,
|
69 |
+
"eval_accuracy": 0.9320477502295684,
|
70 |
+
"eval_loss": 0.38214486837387085,
|
71 |
+
"eval_runtime": 6.7413,
|
72 |
+
"eval_samples_per_second": 161.542,
|
73 |
+
"eval_steps_per_second": 10.235,
|
74 |
+
"step": 1230
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 7.0,
|
78 |
+
"eval_accuracy": 0.9338842975206612,
|
79 |
+
"eval_loss": 0.39508363604545593,
|
80 |
+
"eval_runtime": 6.7438,
|
81 |
+
"eval_samples_per_second": 161.481,
|
82 |
+
"eval_steps_per_second": 10.232,
|
83 |
+
"step": 1435
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 7.32,
|
87 |
+
"learning_rate": 5.365853658536586e-06,
|
88 |
+
"loss": 0.0087,
|
89 |
+
"step": 1500
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 8.0,
|
93 |
+
"eval_accuracy": 0.9357208448117539,
|
94 |
+
"eval_loss": 0.412009596824646,
|
95 |
+
"eval_runtime": 6.7147,
|
96 |
+
"eval_samples_per_second": 162.181,
|
97 |
+
"eval_steps_per_second": 10.276,
|
98 |
+
"step": 1640
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 9.0,
|
102 |
+
"eval_accuracy": 0.9338842975206612,
|
103 |
+
"eval_loss": 0.407277911901474,
|
104 |
+
"eval_runtime": 5.9717,
|
105 |
+
"eval_samples_per_second": 182.361,
|
106 |
+
"eval_steps_per_second": 11.555,
|
107 |
+
"step": 1845
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 9.76,
|
111 |
+
"learning_rate": 4.878048780487805e-07,
|
112 |
+
"loss": 0.0017,
|
113 |
+
"step": 2000
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 10.0,
|
117 |
+
"eval_accuracy": 0.9338842975206612,
|
118 |
+
"eval_loss": 0.41211310029029846,
|
119 |
+
"eval_runtime": 5.9224,
|
120 |
+
"eval_samples_per_second": 183.878,
|
121 |
+
"eval_steps_per_second": 11.651,
|
122 |
+
"step": 2050
|
123 |
}
|
124 |
],
|
125 |
+
"max_steps": 2050,
|
126 |
+
"num_train_epochs": 10,
|
127 |
+
"total_flos": 1609911482519136.0,
|
128 |
"trial_name": null,
|
129 |
"trial_params": null
|
130 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e108b219da858c66c223a83efa615970a4bb54079a81bb38dd212ad549dbcb46
|
3 |
+
size 3515
|