mp-02 commited on
Commit
596ce6a
1 Parent(s): 4c5b0cd

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_accuracy": 0.8434772622875517,
4
- "eval_f1": 0.8826979472140762,
5
- "eval_loss": 0.5468625426292419,
6
- "eval_precision": 0.8632887189292543,
7
- "eval_recall": 0.903,
8
- "eval_runtime": 3.697,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 14.607,
11
- "eval_steps_per_second": 1.623,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
@@ -17,9 +17,9 @@
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
- "train_loss": 0.4636675516764323,
21
- "train_runtime": 318.7136,
22
  "train_samples": 150,
23
- "train_samples_per_second": 9.413,
24
- "train_steps_per_second": 0.941
25
  }
 
1
  {
2
+ "epoch": 10.53,
3
+ "eval_accuracy": 0.833371612310519,
4
+ "eval_f1": 0.8794946550048591,
5
+ "eval_loss": 0.5784164071083069,
6
+ "eval_precision": 0.8553875236294896,
7
+ "eval_recall": 0.905,
8
+ "eval_runtime": 3.53,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 15.297,
11
+ "eval_steps_per_second": 1.133,
12
  "predict_accuracy": 0.8218373936014088,
13
  "predict_f1": 0.9038133181559477,
14
  "predict_loss": 0.6888472437858582,
 
17
  "predict_runtime": 2.0596,
18
  "predict_samples_per_second": 8.74,
19
  "predict_steps_per_second": 0.971,
20
+ "train_loss": 0.50620361328125,
21
+ "train_runtime": 250.4274,
22
  "train_samples": 150,
23
+ "train_samples_per_second": 6.389,
24
+ "train_steps_per_second": 1.597
25
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_accuracy": 0.8434772622875517,
4
- "eval_f1": 0.8826979472140762,
5
- "eval_loss": 0.5468625426292419,
6
- "eval_precision": 0.8632887189292543,
7
- "eval_recall": 0.903,
8
- "eval_runtime": 3.697,
9
  "eval_samples": 54,
10
- "eval_samples_per_second": 14.607,
11
- "eval_steps_per_second": 1.623
12
  }
 
1
  {
2
+ "epoch": 10.53,
3
+ "eval_accuracy": 0.833371612310519,
4
+ "eval_f1": 0.8794946550048591,
5
+ "eval_loss": 0.5784164071083069,
6
+ "eval_precision": 0.8553875236294896,
7
+ "eval_recall": 0.905,
8
+ "eval_runtime": 3.53,
9
  "eval_samples": 54,
10
+ "eval_samples_per_second": 15.297,
11
+ "eval_steps_per_second": 1.133
12
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6676119aed64fb98a2529aadd0eb6d8a586dc32bcd9cdd67336343409ea44e02
3
  size 501420883
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09c539b37ae0e9a660c60e3ec980a33949184bb678cbc4ce1034cc7f63cfd9c6
3
  size 501420883
runs/Aug24_19-17-02_bernini/1724519834.0999153/events.out.tfevents.1724519834.bernini.2498.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f2f29a3eb043f87b51037aeef3b2bba41943231865c71223524ff532d67e558
3
+ size 4665
runs/Aug24_19-17-02_bernini/events.out.tfevents.1724519834.bernini.2498.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3aead96a37123fe126d5f594e9261d05cd21fc383115bd0d934c104e5cee4e7
3
+ size 11945
runs/Aug24_19-17-02_bernini/events.out.tfevents.1724520093.bernini.2498.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a691246cd5d6024bc0eba5baa25a3c7326465903a0736478eda04b748fe72ac
3
+ size 512
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 20.0,
3
- "train_loss": 0.4636675516764323,
4
- "train_runtime": 318.7136,
5
  "train_samples": 150,
6
- "train_samples_per_second": 9.413,
7
- "train_steps_per_second": 0.941
8
  }
 
1
  {
2
+ "epoch": 10.53,
3
+ "train_loss": 0.50620361328125,
4
+ "train_runtime": 250.4274,
5
  "train_samples": 150,
6
+ "train_samples_per_second": 6.389,
7
+ "train_steps_per_second": 1.597
8
  }
trainer_state.json CHANGED
@@ -1,169 +1,217 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.0,
5
- "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.67,
12
- "eval_accuracy": 0.6553743683968765,
13
- "eval_f1": 0.49752416882810657,
14
- "eval_loss": 1.2105501890182495,
15
- "eval_precision": 0.4707719767960732,
16
- "eval_recall": 0.5275,
17
- "eval_runtime": 3.73,
18
- "eval_samples_per_second": 14.477,
19
- "eval_steps_per_second": 1.609,
20
  "step": 25
21
  },
22
  {
23
- "epoch": 3.33,
24
- "eval_accuracy": 0.7687184198438217,
25
- "eval_f1": 0.777563793933558,
26
- "eval_loss": 0.7854474186897278,
27
- "eval_precision": 0.7497678737233054,
28
- "eval_recall": 0.8075,
29
- "eval_runtime": 3.7553,
30
- "eval_samples_per_second": 14.38,
31
- "eval_steps_per_second": 1.598,
32
  "step": 50
33
  },
34
  {
35
- "epoch": 5.0,
36
- "eval_accuracy": 0.8141938447404685,
37
- "eval_f1": 0.8184898354307841,
38
- "eval_loss": 0.6001709699630737,
39
- "eval_precision": 0.7931519699812383,
40
- "eval_recall": 0.8455,
41
- "eval_runtime": 3.7036,
42
- "eval_samples_per_second": 14.58,
43
- "eval_steps_per_second": 1.62,
44
  "step": 75
45
  },
46
  {
47
- "epoch": 6.67,
48
- "eval_accuracy": 0.7781350482315113,
49
- "eval_f1": 0.827977315689981,
50
- "eval_loss": 0.6523196697235107,
51
- "eval_precision": 0.7849462365591398,
52
- "eval_recall": 0.876,
53
- "eval_runtime": 3.7641,
54
- "eval_samples_per_second": 14.346,
55
- "eval_steps_per_second": 1.594,
56
  "step": 100
57
  },
58
  {
59
- "epoch": 8.33,
60
- "eval_accuracy": 0.8354386770785485,
61
- "eval_f1": 0.8442622950819673,
62
- "eval_loss": 0.5189960598945618,
63
- "eval_precision": 0.8151769087523277,
64
- "eval_recall": 0.8755,
65
- "eval_runtime": 3.8049,
66
- "eval_samples_per_second": 14.192,
67
- "eval_steps_per_second": 1.577,
68
  "step": 125
69
  },
70
  {
71
- "epoch": 10.0,
72
- "eval_accuracy": 0.8338309600367478,
73
- "eval_f1": 0.8588007736943907,
74
- "eval_loss": 0.5064252018928528,
75
- "eval_precision": 0.8314606741573034,
76
- "eval_recall": 0.888,
77
- "eval_runtime": 3.7776,
78
- "eval_samples_per_second": 14.295,
79
- "eval_steps_per_second": 1.588,
80
  "step": 150
81
  },
82
  {
83
- "epoch": 11.67,
84
- "eval_accuracy": 0.8344051446945338,
85
- "eval_f1": 0.8693320331545589,
86
- "eval_loss": 0.5342020392417908,
87
- "eval_precision": 0.8482397716460514,
88
- "eval_recall": 0.8915,
89
- "eval_runtime": 3.753,
90
- "eval_samples_per_second": 14.388,
91
- "eval_steps_per_second": 1.599,
92
  "step": 175
93
  },
94
  {
95
- "epoch": 13.33,
96
- "eval_accuracy": 0.8200505282498851,
97
- "eval_f1": 0.8703071672354948,
98
- "eval_loss": 0.5538159012794495,
99
- "eval_precision": 0.8491912464319695,
100
- "eval_recall": 0.8925,
101
- "eval_runtime": 3.8395,
102
- "eval_samples_per_second": 14.064,
103
- "eval_steps_per_second": 1.563,
104
  "step": 200
105
  },
106
  {
107
- "epoch": 15.0,
108
- "eval_accuracy": 0.8348644924207625,
109
- "eval_f1": 0.8777398928397467,
110
- "eval_loss": 0.5335590243339539,
111
- "eval_precision": 0.855650522317189,
112
- "eval_recall": 0.901,
113
- "eval_runtime": 3.7278,
114
- "eval_samples_per_second": 14.486,
115
- "eval_steps_per_second": 1.61,
116
  "step": 225
117
  },
118
  {
119
- "epoch": 16.67,
120
- "eval_accuracy": 0.8385392742305926,
121
- "eval_f1": 0.8764648437499999,
122
- "eval_loss": 0.5464726686477661,
123
- "eval_precision": 0.8563931297709924,
124
- "eval_recall": 0.8975,
125
- "eval_runtime": 3.7679,
126
- "eval_samples_per_second": 14.331,
127
- "eval_steps_per_second": 1.592,
128
  "step": 250
129
  },
130
  {
131
- "epoch": 18.33,
132
- "eval_accuracy": 0.8439366100137804,
133
- "eval_f1": 0.8787509148572822,
134
- "eval_loss": 0.5402917265892029,
135
- "eval_precision": 0.8580276322058122,
136
- "eval_recall": 0.9005,
137
- "eval_runtime": 3.7992,
138
- "eval_samples_per_second": 14.214,
139
- "eval_steps_per_second": 1.579,
140
  "step": 275
141
  },
142
  {
143
- "epoch": 20.0,
144
- "eval_accuracy": 0.8434772622875517,
145
- "eval_f1": 0.8826979472140762,
146
- "eval_loss": 0.5468625426292419,
147
- "eval_precision": 0.8632887189292543,
148
- "eval_recall": 0.903,
149
- "eval_runtime": 3.7775,
150
- "eval_samples_per_second": 14.295,
151
- "eval_steps_per_second": 1.588,
152
  "step": 300
153
  },
154
  {
155
- "epoch": 20.0,
156
- "step": 300,
157
- "total_flos": 790816029696000.0,
158
- "train_loss": 0.4636675516764323,
159
- "train_runtime": 318.7136,
160
- "train_samples_per_second": 9.413,
161
- "train_steps_per_second": 0.941
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
  }
163
  ],
164
- "max_steps": 300,
165
- "num_train_epochs": 20,
166
- "total_flos": 790816029696000.0,
167
  "trial_name": null,
168
  "trial_params": null
169
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.526315789473685,
5
+ "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.66,
12
+ "eval_accuracy": 0.5721175930179145,
13
+ "eval_f1": 0.34372003835091086,
14
+ "eval_loss": 1.351142406463623,
15
+ "eval_precision": 0.3301104972375691,
16
+ "eval_recall": 0.3585,
17
+ "eval_runtime": 3.6101,
18
+ "eval_samples_per_second": 14.958,
19
+ "eval_steps_per_second": 1.108,
20
  "step": 25
21
  },
22
  {
23
+ "epoch": 1.32,
24
+ "eval_accuracy": 0.7614836931557188,
25
+ "eval_f1": 0.7229437229437228,
26
+ "eval_loss": 0.905920684337616,
27
+ "eval_precision": 0.6964782205746061,
28
+ "eval_recall": 0.7515,
29
+ "eval_runtime": 3.5063,
30
+ "eval_samples_per_second": 15.401,
31
+ "eval_steps_per_second": 1.141,
32
  "step": 50
33
  },
34
  {
35
+ "epoch": 1.97,
36
+ "eval_accuracy": 0.7796279283417548,
37
+ "eval_f1": 0.7946449916327994,
38
+ "eval_loss": 0.7163704633712769,
39
+ "eval_precision": 0.7613376087952359,
40
+ "eval_recall": 0.831,
41
+ "eval_runtime": 3.5828,
42
+ "eval_samples_per_second": 15.072,
43
+ "eval_steps_per_second": 1.116,
44
  "step": 75
45
  },
46
  {
47
+ "epoch": 2.63,
48
+ "eval_accuracy": 0.799265043638034,
49
+ "eval_f1": 0.8249158249158249,
50
+ "eval_loss": 0.6392571926116943,
51
+ "eval_precision": 0.7947173308619092,
52
+ "eval_recall": 0.8575,
53
+ "eval_runtime": 3.5968,
54
+ "eval_samples_per_second": 15.013,
55
+ "eval_steps_per_second": 1.112,
56
  "step": 100
57
  },
58
  {
59
+ "epoch": 3.29,
60
+ "eval_accuracy": 0.8104042259990814,
61
+ "eval_f1": 0.8409859835669407,
62
+ "eval_loss": 0.5755508542060852,
63
+ "eval_precision": 0.813844714686623,
64
+ "eval_recall": 0.87,
65
+ "eval_runtime": 3.5975,
66
+ "eval_samples_per_second": 15.011,
67
+ "eval_steps_per_second": 1.112,
68
  "step": 125
69
  },
70
  {
71
+ "epoch": 3.95,
72
+ "eval_accuracy": 0.8323380799265043,
73
+ "eval_f1": 0.8506134231416886,
74
+ "eval_loss": 0.5508233308792114,
75
+ "eval_precision": 0.8196569309225776,
76
+ "eval_recall": 0.884,
77
+ "eval_runtime": 3.6045,
78
+ "eval_samples_per_second": 14.981,
79
+ "eval_steps_per_second": 1.11,
80
  "step": 150
81
  },
82
  {
83
+ "epoch": 4.61,
84
+ "eval_accuracy": 0.8327974276527331,
85
+ "eval_f1": 0.8600435097897026,
86
+ "eval_loss": 0.5458412170410156,
87
+ "eval_precision": 0.8324754328497894,
88
+ "eval_recall": 0.8895,
89
+ "eval_runtime": 3.5309,
90
+ "eval_samples_per_second": 15.294,
91
+ "eval_steps_per_second": 1.133,
92
  "step": 175
93
  },
94
  {
95
+ "epoch": 5.26,
96
+ "eval_accuracy": 0.826596233348645,
97
+ "eval_f1": 0.8491160087188181,
98
+ "eval_loss": 0.5740342736244202,
99
+ "eval_precision": 0.8233912635039925,
100
+ "eval_recall": 0.8765,
101
+ "eval_runtime": 3.622,
102
+ "eval_samples_per_second": 14.909,
103
+ "eval_steps_per_second": 1.104,
104
  "step": 200
105
  },
106
  {
107
+ "epoch": 5.92,
108
+ "eval_accuracy": 0.8361276986678916,
109
+ "eval_f1": 0.8709914320685433,
110
+ "eval_loss": 0.5719187259674072,
111
+ "eval_precision": 0.8532374100719424,
112
+ "eval_recall": 0.8895,
113
+ "eval_runtime": 3.5659,
114
+ "eval_samples_per_second": 15.143,
115
+ "eval_steps_per_second": 1.122,
116
  "step": 225
117
  },
118
  {
119
+ "epoch": 6.58,
120
+ "eval_accuracy": 0.8263665594855305,
121
+ "eval_f1": 0.8736131210805596,
122
+ "eval_loss": 0.5435599684715271,
123
+ "eval_precision": 0.8438956197576887,
124
+ "eval_recall": 0.9055,
125
+ "eval_runtime": 3.5266,
126
+ "eval_samples_per_second": 15.312,
127
+ "eval_steps_per_second": 1.134,
128
  "step": 250
129
  },
130
  {
131
+ "epoch": 7.24,
132
+ "eval_accuracy": 0.8290078089113458,
133
+ "eval_f1": 0.8783914728682171,
134
+ "eval_loss": 0.5714461207389832,
135
+ "eval_precision": 0.8519736842105263,
136
+ "eval_recall": 0.9065,
137
+ "eval_runtime": 3.6124,
138
+ "eval_samples_per_second": 14.948,
139
+ "eval_steps_per_second": 1.107,
140
  "step": 275
141
  },
142
  {
143
+ "epoch": 7.89,
144
+ "eval_accuracy": 0.8280891134588884,
145
+ "eval_f1": 0.8791048406713695,
146
+ "eval_loss": 0.5853330492973328,
147
+ "eval_precision": 0.8559924206537186,
148
+ "eval_recall": 0.9035,
149
+ "eval_runtime": 3.6008,
150
+ "eval_samples_per_second": 14.997,
151
+ "eval_steps_per_second": 1.111,
152
  "step": 300
153
  },
154
  {
155
+ "epoch": 8.55,
156
+ "eval_accuracy": 0.8389986219568213,
157
+ "eval_f1": 0.8807785888077859,
158
+ "eval_loss": 0.570177435874939,
159
+ "eval_precision": 0.8578199052132701,
160
+ "eval_recall": 0.905,
161
+ "eval_runtime": 3.5637,
162
+ "eval_samples_per_second": 15.153,
163
+ "eval_steps_per_second": 1.122,
164
+ "step": 325
165
+ },
166
+ {
167
+ "epoch": 9.21,
168
+ "eval_accuracy": 0.8418695452457511,
169
+ "eval_f1": 0.8775261748234722,
170
+ "eval_loss": 0.5666728019714355,
171
+ "eval_precision": 0.8552444233507357,
172
+ "eval_recall": 0.901,
173
+ "eval_runtime": 3.5809,
174
+ "eval_samples_per_second": 15.08,
175
+ "eval_steps_per_second": 1.117,
176
+ "step": 350
177
+ },
178
+ {
179
+ "epoch": 9.87,
180
+ "eval_accuracy": 0.8338309600367478,
181
+ "eval_f1": 0.8786773644541697,
182
+ "eval_loss": 0.5793057084083557,
183
+ "eval_precision": 0.8551822053951728,
184
+ "eval_recall": 0.9035,
185
+ "eval_runtime": 3.5472,
186
+ "eval_samples_per_second": 15.223,
187
+ "eval_steps_per_second": 1.128,
188
+ "step": 375
189
+ },
190
+ {
191
+ "epoch": 10.53,
192
+ "eval_accuracy": 0.833371612310519,
193
+ "eval_f1": 0.8794946550048591,
194
+ "eval_loss": 0.5784164071083069,
195
+ "eval_precision": 0.8553875236294896,
196
+ "eval_recall": 0.905,
197
+ "eval_runtime": 3.5919,
198
+ "eval_samples_per_second": 15.034,
199
+ "eval_steps_per_second": 1.114,
200
+ "step": 400
201
+ },
202
+ {
203
+ "epoch": 10.53,
204
+ "step": 400,
205
+ "total_flos": 416496442306560.0,
206
+ "train_loss": 0.50620361328125,
207
+ "train_runtime": 250.4274,
208
+ "train_samples_per_second": 6.389,
209
+ "train_steps_per_second": 1.597
210
  }
211
  ],
212
+ "max_steps": 400,
213
+ "num_train_epochs": 11,
214
+ "total_flos": 416496442306560.0,
215
  "trial_name": null,
216
  "trial_params": null
217
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad5c6ae451d7fcc096ada600794b9529838382a07bad9f882b8f80b359b5b02b
3
  size 2927
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b9738bcfd98ccbf71720f6b0ac66e4c20f1ded32caa28f7663edc931468381e
3
  size 2927