DreamyBeaver commited on
Commit
c29b68a
1 Parent(s): 791e49a

Update model with 10 epochs training instead of 5

Browse files
Files changed (6) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +93 -36
  6. training_args.bin +2 -2
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ec84c2f0f952550b58c77f24e7a9e2e22f0f65c93eee46c1e0f74e464dc8138
3
  size 535701061
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6c0959ac08c4fdfbb1a577f3b4842153ff8bdaec08671db6971d10057890ba8
3
  size 535701061
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f199e4fdffce5c83e85e08698509f156311a62cfba2f350b7f220bf7bf720ed
3
  size 267855533
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:982a429bea27cf57937ff13a0a06b0bdcd583103106f091c13b374ff4a8befe8
3
  size 267855533
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86f0404e212b33c54e2d7fa17e310613dcbdc6e02640ff6dbe0e6e7bd9ec52e3
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c24ded61278d3bdb501c4e495dd10ddf01fa15e0368e52de2a102c5edb6db5a3
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:480847c63183681bd2c27f7e8f3e7a7b7833a9deb49302dac82b9eeedcbee11c
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e71dec5fad31471bb08f1e33f5fe0414a42f69c80b949819391dcaab38f181f3
3
  size 627
trainer_state.json CHANGED
@@ -1,73 +1,130 @@
1
  {
2
- "best_metric": 0.20286166667938232,
3
- "best_model_checkpoint": "model1/checkpoint-205",
4
- "epoch": 5.0,
5
- "global_step": 1025,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "eval_accuracy": 0.931129476584022,
13
- "eval_loss": 0.20286166667938232,
14
- "eval_runtime": 7.2605,
15
- "eval_samples_per_second": 149.99,
16
- "eval_steps_per_second": 9.503,
17
  "step": 205
18
  },
19
  {
20
  "epoch": 2.0,
21
- "eval_accuracy": 0.9357208448117539,
22
- "eval_loss": 0.22182105481624603,
23
- "eval_runtime": 7.6337,
24
- "eval_samples_per_second": 142.657,
25
- "eval_steps_per_second": 9.039,
26
  "step": 410
27
  },
28
  {
29
  "epoch": 2.44,
30
- "learning_rate": 1.024390243902439e-05,
31
- "loss": 0.1905,
32
  "step": 500
33
  },
34
  {
35
  "epoch": 3.0,
36
- "eval_accuracy": 0.9357208448117539,
37
- "eval_loss": 0.2710420787334442,
38
- "eval_runtime": 7.705,
39
- "eval_samples_per_second": 141.337,
40
- "eval_steps_per_second": 8.955,
41
  "step": 615
42
  },
43
  {
44
  "epoch": 4.0,
45
- "eval_accuracy": 0.9366391184573003,
46
- "eval_loss": 0.29000386595726013,
47
- "eval_runtime": 7.7216,
48
- "eval_samples_per_second": 141.034,
49
- "eval_steps_per_second": 8.936,
50
  "step": 820
51
  },
52
  {
53
  "epoch": 4.88,
54
- "learning_rate": 4.878048780487805e-07,
55
- "loss": 0.0358,
56
  "step": 1000
57
  },
58
  {
59
  "epoch": 5.0,
60
- "eval_accuracy": 0.9366391184573003,
61
- "eval_loss": 0.30663052201271057,
62
- "eval_runtime": 7.6339,
63
- "eval_samples_per_second": 142.652,
64
- "eval_steps_per_second": 9.039,
65
  "step": 1025
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  ],
68
- "max_steps": 1025,
69
- "num_train_epochs": 5,
70
- "total_flos": 939897589526400.0,
71
  "trial_name": null,
72
  "trial_params": null
73
  }
 
1
  {
2
+ "best_metric": 0.20570282638072968,
3
+ "best_model_checkpoint": "DistilBERT1/checkpoint-205",
4
+ "epoch": 10.0,
5
+ "global_step": 2050,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "eval_accuracy": 0.921028466483012,
13
+ "eval_loss": 0.20570282638072968,
14
+ "eval_runtime": 6.0089,
15
+ "eval_samples_per_second": 181.232,
16
+ "eval_steps_per_second": 11.483,
17
  "step": 205
18
  },
19
  {
20
  "epoch": 2.0,
21
+ "eval_accuracy": 0.9302112029384757,
22
+ "eval_loss": 0.2391415685415268,
23
+ "eval_runtime": 6.3171,
24
+ "eval_samples_per_second": 172.388,
25
+ "eval_steps_per_second": 10.923,
26
  "step": 410
27
  },
28
  {
29
  "epoch": 2.44,
30
+ "learning_rate": 1.5121951219512196e-05,
31
+ "loss": 0.2164,
32
  "step": 500
33
  },
34
  {
35
  "epoch": 3.0,
36
+ "eval_accuracy": 0.9320477502295684,
37
+ "eval_loss": 0.2756326496601105,
38
+ "eval_runtime": 6.7274,
39
+ "eval_samples_per_second": 161.875,
40
+ "eval_steps_per_second": 10.257,
41
  "step": 615
42
  },
43
  {
44
  "epoch": 4.0,
45
+ "eval_accuracy": 0.9274563820018366,
46
+ "eval_loss": 0.322733998298645,
47
+ "eval_runtime": 6.7541,
48
+ "eval_samples_per_second": 161.235,
49
+ "eval_steps_per_second": 10.216,
50
  "step": 820
51
  },
52
  {
53
  "epoch": 4.88,
54
+ "learning_rate": 1.024390243902439e-05,
55
+ "loss": 0.0376,
56
  "step": 1000
57
  },
58
  {
59
  "epoch": 5.0,
60
+ "eval_accuracy": 0.9329660238751147,
61
+ "eval_loss": 0.3475565016269684,
62
+ "eval_runtime": 6.7958,
63
+ "eval_samples_per_second": 160.245,
64
+ "eval_steps_per_second": 10.153,
65
  "step": 1025
66
+ },
67
+ {
68
+ "epoch": 6.0,
69
+ "eval_accuracy": 0.9320477502295684,
70
+ "eval_loss": 0.38214486837387085,
71
+ "eval_runtime": 6.7413,
72
+ "eval_samples_per_second": 161.542,
73
+ "eval_steps_per_second": 10.235,
74
+ "step": 1230
75
+ },
76
+ {
77
+ "epoch": 7.0,
78
+ "eval_accuracy": 0.9338842975206612,
79
+ "eval_loss": 0.39508363604545593,
80
+ "eval_runtime": 6.7438,
81
+ "eval_samples_per_second": 161.481,
82
+ "eval_steps_per_second": 10.232,
83
+ "step": 1435
84
+ },
85
+ {
86
+ "epoch": 7.32,
87
+ "learning_rate": 5.365853658536586e-06,
88
+ "loss": 0.0087,
89
+ "step": 1500
90
+ },
91
+ {
92
+ "epoch": 8.0,
93
+ "eval_accuracy": 0.9357208448117539,
94
+ "eval_loss": 0.412009596824646,
95
+ "eval_runtime": 6.7147,
96
+ "eval_samples_per_second": 162.181,
97
+ "eval_steps_per_second": 10.276,
98
+ "step": 1640
99
+ },
100
+ {
101
+ "epoch": 9.0,
102
+ "eval_accuracy": 0.9338842975206612,
103
+ "eval_loss": 0.407277911901474,
104
+ "eval_runtime": 5.9717,
105
+ "eval_samples_per_second": 182.361,
106
+ "eval_steps_per_second": 11.555,
107
+ "step": 1845
108
+ },
109
+ {
110
+ "epoch": 9.76,
111
+ "learning_rate": 4.878048780487805e-07,
112
+ "loss": 0.0017,
113
+ "step": 2000
114
+ },
115
+ {
116
+ "epoch": 10.0,
117
+ "eval_accuracy": 0.9338842975206612,
118
+ "eval_loss": 0.41211310029029846,
119
+ "eval_runtime": 5.9224,
120
+ "eval_samples_per_second": 183.878,
121
+ "eval_steps_per_second": 11.651,
122
+ "step": 2050
123
  }
124
  ],
125
+ "max_steps": 2050,
126
+ "num_train_epochs": 10,
127
+ "total_flos": 1609911482519136.0,
128
  "trial_name": null,
129
  "trial_params": null
130
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b118a921ab6ca0db093022ec7285078dbf750a2d7daa95ef6f8cf5250ea0b49c
3
- size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e108b219da858c66c223a83efa615970a4bb54079a81bb38dd212ad549dbcb46
3
+ size 3515