fydhfzh commited on
Commit
3f38182
1 Parent(s): 05b9784

Training in progress, step 4000

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbe241edd2f9b1396cbd00e43c84468bc1888ce412f5308429914441b04504f4
3
  size 378386248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d660e2f4ba7425de092e46febcb6dd5a9162f97de6a8f80031a80fd3e1292646
3
  size 378386248
runs/Jul09_00-03-18_LAPTOP-1GID9RGH/events.out.tfevents.1720458199.LAPTOP-1GID9RGH.8944.8 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0571cb92ca90052a0ba0c39f8a4408f6d9cc59a71876cda94f161d5ddbee536f
3
- size 47498
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7189cb4e8869ac61b42f06df636e6771a58cc2120ef2a34efa12e05e1747366
3
+ size 52929
tmp-checkpoint-4000/config.json ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/hubert-base-ls960",
3
+ "activation_dropout": 0.1,
4
+ "apply_spec_augment": true,
5
+ "architectures": [
6
+ "HubertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 1,
10
+ "classifier_proj_size": 256,
11
+ "conv_bias": false,
12
+ "conv_dim": [
13
+ 512,
14
+ 512,
15
+ 512,
16
+ 512,
17
+ 512,
18
+ 512,
19
+ 512
20
+ ],
21
+ "conv_kernel": [
22
+ 10,
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3,
27
+ 2,
28
+ 2
29
+ ],
30
+ "conv_stride": [
31
+ 5,
32
+ 2,
33
+ 2,
34
+ 2,
35
+ 2,
36
+ 2,
37
+ 2
38
+ ],
39
+ "ctc_loss_reduction": "sum",
40
+ "ctc_zero_infinity": false,
41
+ "do_stable_layer_norm": false,
42
+ "eos_token_id": 2,
43
+ "feat_extract_activation": "gelu",
44
+ "feat_extract_dropout": 0.0,
45
+ "feat_extract_norm": "group",
46
+ "feat_proj_dropout": 0.1,
47
+ "feat_proj_layer_norm": true,
48
+ "final_dropout": 0.1,
49
+ "gradient_checkpointing": false,
50
+ "hidden_act": "gelu",
51
+ "hidden_dropout": 0.1,
52
+ "hidden_dropout_prob": 0.1,
53
+ "hidden_size": 768,
54
+ "id2label": {
55
+ "0": "0000101",
56
+ "1": "0000110",
57
+ "10": "0010010",
58
+ "11": "0010011",
59
+ "12": "0010101",
60
+ "13": "0010110",
61
+ "14": "0010111",
62
+ "15": "0011001",
63
+ "16": "0011010",
64
+ "17": "0011011",
65
+ "18": "0011101",
66
+ "19": "0011110",
67
+ "2": "0000111",
68
+ "20": "0011111",
69
+ "21": "0100001",
70
+ "22": "0100010",
71
+ "23": "0100011",
72
+ "24": "0100101",
73
+ "25": "0100110",
74
+ "26": "0100111",
75
+ "27": "0101001",
76
+ "28": "0101010",
77
+ "29": "0101011",
78
+ "3": "0001001",
79
+ "30": "0101101",
80
+ "31": "0101110",
81
+ "32": "0101111",
82
+ "33": "0110001",
83
+ "34": "0110010",
84
+ "35": "0110011",
85
+ "36": "0110101",
86
+ "37": "0110110",
87
+ "38": "0110111",
88
+ "39": "0111001",
89
+ "4": "0001010",
90
+ "40": "0111010",
91
+ "41": "0111011",
92
+ "42": "0111101",
93
+ "43": "0111110",
94
+ "44": "0111111",
95
+ "45": "1000001",
96
+ "46": "1000010",
97
+ "47": "1000011",
98
+ "48": "1000101",
99
+ "49": "1000110",
100
+ "5": "0001011",
101
+ "50": "1000111",
102
+ "51": "1001001",
103
+ "52": "1001010",
104
+ "53": "1001011",
105
+ "54": "1001101",
106
+ "55": "1001110",
107
+ "56": "1001111",
108
+ "57": "1010001",
109
+ "58": "1010010",
110
+ "59": "1010011",
111
+ "6": "0001101",
112
+ "60": "1010101",
113
+ "61": "1010110",
114
+ "62": "1010111",
115
+ "63": "1011001",
116
+ "64": "1011010",
117
+ "65": "1011011",
118
+ "66": "1011101",
119
+ "67": "1011110",
120
+ "68": "1011111",
121
+ "69": "1100001",
122
+ "7": "0001110",
123
+ "70": "1100010",
124
+ "71": "1100011",
125
+ "72": "1100101",
126
+ "73": "1100110",
127
+ "74": "1100111",
128
+ "75": "1101001",
129
+ "76": "1101010",
130
+ "77": "1101011",
131
+ "78": "1101101",
132
+ "79": "1101110",
133
+ "8": "0001111",
134
+ "80": "1101111",
135
+ "81": "1110001",
136
+ "82": "1110010",
137
+ "83": "1110011",
138
+ "9": "0010001"
139
+ },
140
+ "initializer_range": 0.02,
141
+ "intermediate_size": 3072,
142
+ "label2id": {
143
+ "0000101": "0",
144
+ "0000110": "1",
145
+ "0000111": "2",
146
+ "0001001": "3",
147
+ "0001010": "4",
148
+ "0001011": "5",
149
+ "0001101": "6",
150
+ "0001110": "7",
151
+ "0001111": "8",
152
+ "0010001": "9",
153
+ "0010010": "10",
154
+ "0010011": "11",
155
+ "0010101": "12",
156
+ "0010110": "13",
157
+ "0010111": "14",
158
+ "0011001": "15",
159
+ "0011010": "16",
160
+ "0011011": "17",
161
+ "0011101": "18",
162
+ "0011110": "19",
163
+ "0011111": "20",
164
+ "0100001": "21",
165
+ "0100010": "22",
166
+ "0100011": "23",
167
+ "0100101": "24",
168
+ "0100110": "25",
169
+ "0100111": "26",
170
+ "0101001": "27",
171
+ "0101010": "28",
172
+ "0101011": "29",
173
+ "0101101": "30",
174
+ "0101110": "31",
175
+ "0101111": "32",
176
+ "0110001": "33",
177
+ "0110010": "34",
178
+ "0110011": "35",
179
+ "0110101": "36",
180
+ "0110110": "37",
181
+ "0110111": "38",
182
+ "0111001": "39",
183
+ "0111010": "40",
184
+ "0111011": "41",
185
+ "0111101": "42",
186
+ "0111110": "43",
187
+ "0111111": "44",
188
+ "1000001": "45",
189
+ "1000010": "46",
190
+ "1000011": "47",
191
+ "1000101": "48",
192
+ "1000110": "49",
193
+ "1000111": "50",
194
+ "1001001": "51",
195
+ "1001010": "52",
196
+ "1001011": "53",
197
+ "1001101": "54",
198
+ "1001110": "55",
199
+ "1001111": "56",
200
+ "1010001": "57",
201
+ "1010010": "58",
202
+ "1010011": "59",
203
+ "1010101": "60",
204
+ "1010110": "61",
205
+ "1010111": "62",
206
+ "1011001": "63",
207
+ "1011010": "64",
208
+ "1011011": "65",
209
+ "1011101": "66",
210
+ "1011110": "67",
211
+ "1011111": "68",
212
+ "1100001": "69",
213
+ "1100010": "70",
214
+ "1100011": "71",
215
+ "1100101": "72",
216
+ "1100110": "73",
217
+ "1100111": "74",
218
+ "1101001": "75",
219
+ "1101010": "76",
220
+ "1101011": "77",
221
+ "1101101": "78",
222
+ "1101110": "79",
223
+ "1101111": "80",
224
+ "1110001": "81",
225
+ "1110010": "82",
226
+ "1110011": "83"
227
+ },
228
+ "layer_norm_eps": 1e-05,
229
+ "layerdrop": 0.1,
230
+ "mask_feature_length": 10,
231
+ "mask_feature_min_masks": 0,
232
+ "mask_feature_prob": 0.0,
233
+ "mask_time_length": 10,
234
+ "mask_time_min_masks": 2,
235
+ "mask_time_prob": 0.05,
236
+ "model_type": "hubert",
237
+ "num_attention_heads": 12,
238
+ "num_conv_pos_embedding_groups": 16,
239
+ "num_conv_pos_embeddings": 128,
240
+ "num_feat_extract_layers": 7,
241
+ "num_hidden_layers": 12,
242
+ "pad_token_id": 0,
243
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
244
+ "torch_dtype": "float32",
245
+ "transformers_version": "4.38.2",
246
+ "use_weighted_layer_sum": false,
247
+ "vocab_size": 32
248
+ }
tmp-checkpoint-4000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d660e2f4ba7425de092e46febcb6dd5a9162f97de6a8f80031a80fd3e1292646
3
+ size 378386248
tmp-checkpoint-4000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e30f374e8863f3de830e11606fa008e5faf5f24d67567c2d185d1aaef9d61b59
3
+ size 723289978
tmp-checkpoint-4000/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": false,
8
+ "sampling_rate": 16000
9
+ }
tmp-checkpoint-4000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9609321d39eed828389b64228f351e48770c080465c0ca20848a03ddc9dc4817
3
+ size 14308
tmp-checkpoint-4000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3d630a10c540814c311a2966ecad86e893dd58f8d91138ffb7cb4c0fbb47a42
3
+ size 1064
tmp-checkpoint-4000/trainer_state.json ADDED
@@ -0,0 +1,1131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8784962349040888,
3
+ "best_model_checkpoint": "fydhfzh/hubert-classifier-aug-fold-4\\checkpoint-4000",
4
+ "epoch": 10.78167115902965,
5
+ "eval_steps": 50,
6
+ "global_step": 4000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.13,
13
+ "eval_accuracy": 0.04588394062078273,
14
+ "eval_binary": 0.3221322537112049,
15
+ "eval_f1": 0.014671635517615288,
16
+ "eval_loss": 3.805781364440918,
17
+ "eval_precision": 0.03779948934318612,
18
+ "eval_recall": 0.04588394062078273,
19
+ "eval_runtime": 1.0685,
20
+ "eval_samples_per_second": 693.469,
21
+ "eval_steps_per_second": 22.461,
22
+ "step": 50
23
+ },
24
+ {
25
+ "epoch": 0.27,
26
+ "eval_accuracy": 0.1106612685560054,
27
+ "eval_binary": 0.374358974358979,
28
+ "eval_f1": 0.03914518880642342,
29
+ "eval_loss": 3.335998058319092,
30
+ "eval_precision": 0.0301847940928463,
31
+ "eval_recall": 0.1106612685560054,
32
+ "eval_runtime": 1.0612,
33
+ "eval_samples_per_second": 698.296,
34
+ "eval_steps_per_second": 22.617,
35
+ "step": 100
36
+ },
37
+ {
38
+ "epoch": 0.4,
39
+ "eval_accuracy": 0.23346828609986506,
40
+ "eval_binary": 0.4604588394062126,
41
+ "eval_f1": 0.14009724606429286,
42
+ "eval_loss": 3.007000684738159,
43
+ "eval_precision": 0.12454766261554863,
44
+ "eval_recall": 0.23346828609986506,
45
+ "eval_runtime": 1.0735,
46
+ "eval_samples_per_second": 690.246,
47
+ "eval_steps_per_second": 22.356,
48
+ "step": 150
49
+ },
50
+ {
51
+ "epoch": 0.54,
52
+ "eval_accuracy": 0.3468286099865047,
53
+ "eval_binary": 0.5406207827260503,
54
+ "eval_f1": 0.25241630871467685,
55
+ "eval_loss": 2.556727647781372,
56
+ "eval_precision": 0.24171104417236225,
57
+ "eval_recall": 0.3468286099865047,
58
+ "eval_runtime": 1.0985,
59
+ "eval_samples_per_second": 674.535,
60
+ "eval_steps_per_second": 21.847,
61
+ "step": 200
62
+ },
63
+ {
64
+ "epoch": 0.67,
65
+ "eval_accuracy": 0.42645074224021595,
66
+ "eval_binary": 0.5983805668016237,
67
+ "eval_f1": 0.35188011958617477,
68
+ "eval_loss": 2.177004337310791,
69
+ "eval_precision": 0.3602976489469129,
70
+ "eval_recall": 0.42645074224021595,
71
+ "eval_runtime": 1.1241,
72
+ "eval_samples_per_second": 659.219,
73
+ "eval_steps_per_second": 21.351,
74
+ "step": 250
75
+ },
76
+ {
77
+ "epoch": 0.81,
78
+ "eval_accuracy": 0.4966261808367072,
79
+ "eval_binary": 0.647098515519572,
80
+ "eval_f1": 0.43339387931632517,
81
+ "eval_loss": 1.8818293809890747,
82
+ "eval_precision": 0.4889656579714102,
83
+ "eval_recall": 0.4966261808367072,
84
+ "eval_runtime": 1.1323,
85
+ "eval_samples_per_second": 654.399,
86
+ "eval_steps_per_second": 21.195,
87
+ "step": 300
88
+ },
89
+ {
90
+ "epoch": 0.94,
91
+ "eval_accuracy": 0.5506072874493927,
92
+ "eval_binary": 0.6852901484480469,
93
+ "eval_f1": 0.5044747772286964,
94
+ "eval_loss": 1.6562384366989136,
95
+ "eval_precision": 0.5479471666625184,
96
+ "eval_recall": 0.5506072874493927,
97
+ "eval_runtime": 1.1248,
98
+ "eval_samples_per_second": 658.769,
99
+ "eval_steps_per_second": 21.337,
100
+ "step": 350
101
+ },
102
+ {
103
+ "epoch": 1.0,
104
+ "grad_norm": 4.053846836090088,
105
+ "learning_rate": 9.666666666666667e-05,
106
+ "loss": 2.9369,
107
+ "step": 371
108
+ },
109
+ {
110
+ "epoch": 1.08,
111
+ "eval_accuracy": 0.6045883940620783,
112
+ "eval_binary": 0.7234817813765205,
113
+ "eval_f1": 0.5598557405312042,
114
+ "eval_loss": 1.4207106828689575,
115
+ "eval_precision": 0.590639979026807,
116
+ "eval_recall": 0.6045883940620783,
117
+ "eval_runtime": 1.1338,
118
+ "eval_samples_per_second": 653.569,
119
+ "eval_steps_per_second": 21.168,
120
+ "step": 400
121
+ },
122
+ {
123
+ "epoch": 1.21,
124
+ "eval_accuracy": 0.6531713900134952,
125
+ "eval_binary": 0.7587044534412956,
126
+ "eval_f1": 0.6214387578818519,
127
+ "eval_loss": 1.256138801574707,
128
+ "eval_precision": 0.6601188715535435,
129
+ "eval_recall": 0.6531713900134952,
130
+ "eval_runtime": 1.1142,
131
+ "eval_samples_per_second": 665.023,
132
+ "eval_steps_per_second": 21.539,
133
+ "step": 450
134
+ },
135
+ {
136
+ "epoch": 1.35,
137
+ "eval_accuracy": 0.6653171390013495,
138
+ "eval_binary": 0.7655870445344135,
139
+ "eval_f1": 0.6326851465486375,
140
+ "eval_loss": 1.1726747751235962,
141
+ "eval_precision": 0.6850642147547112,
142
+ "eval_recall": 0.6653171390013495,
143
+ "eval_runtime": 1.176,
144
+ "eval_samples_per_second": 630.094,
145
+ "eval_steps_per_second": 20.408,
146
+ "step": 500
147
+ },
148
+ {
149
+ "epoch": 1.48,
150
+ "eval_accuracy": 0.7300944669365722,
151
+ "eval_binary": 0.809986504723346,
152
+ "eval_f1": 0.7136995231626676,
153
+ "eval_loss": 1.0430372953414917,
154
+ "eval_precision": 0.7479252809718396,
155
+ "eval_recall": 0.7300944669365722,
156
+ "eval_runtime": 1.1554,
157
+ "eval_samples_per_second": 641.328,
158
+ "eval_steps_per_second": 20.772,
159
+ "step": 550
160
+ },
161
+ {
162
+ "epoch": 1.62,
163
+ "eval_accuracy": 0.728744939271255,
164
+ "eval_binary": 0.809986504723346,
165
+ "eval_f1": 0.7074703954108517,
166
+ "eval_loss": 0.9479284286499023,
167
+ "eval_precision": 0.740343928828029,
168
+ "eval_recall": 0.728744939271255,
169
+ "eval_runtime": 1.1389,
170
+ "eval_samples_per_second": 650.651,
171
+ "eval_steps_per_second": 21.074,
172
+ "step": 600
173
+ },
174
+ {
175
+ "epoch": 1.75,
176
+ "eval_accuracy": 0.766531713900135,
177
+ "eval_binary": 0.8377867746288786,
178
+ "eval_f1": 0.7547522208317685,
179
+ "eval_loss": 0.8898987770080566,
180
+ "eval_precision": 0.7879262475416322,
181
+ "eval_recall": 0.766531713900135,
182
+ "eval_runtime": 1.156,
183
+ "eval_samples_per_second": 641.001,
184
+ "eval_steps_per_second": 20.761,
185
+ "step": 650
186
+ },
187
+ {
188
+ "epoch": 1.89,
189
+ "eval_accuracy": 0.815114709851552,
190
+ "eval_binary": 0.8699055330634268,
191
+ "eval_f1": 0.8043569052236664,
192
+ "eval_loss": 0.7485046982765198,
193
+ "eval_precision": 0.8233385560308638,
194
+ "eval_recall": 0.815114709851552,
195
+ "eval_runtime": 1.1399,
196
+ "eval_samples_per_second": 650.034,
197
+ "eval_steps_per_second": 21.054,
198
+ "step": 700
199
+ },
200
+ {
201
+ "epoch": 2.0,
202
+ "grad_norm": 4.031270980834961,
203
+ "learning_rate": 9.333333333333334e-05,
204
+ "loss": 1.3588,
205
+ "step": 742
206
+ },
207
+ {
208
+ "epoch": 2.02,
209
+ "eval_accuracy": 0.7921727395411606,
210
+ "eval_binary": 0.8553306342780018,
211
+ "eval_f1": 0.7800833014875206,
212
+ "eval_loss": 0.7457680702209473,
213
+ "eval_precision": 0.8196362229763039,
214
+ "eval_recall": 0.7921727395411606,
215
+ "eval_runtime": 1.1503,
216
+ "eval_samples_per_second": 644.196,
217
+ "eval_steps_per_second": 20.865,
218
+ "step": 750
219
+ },
220
+ {
221
+ "epoch": 2.16,
222
+ "eval_accuracy": 0.8137651821862348,
223
+ "eval_binary": 0.8686909581646418,
224
+ "eval_f1": 0.8106549167288675,
225
+ "eval_loss": 0.7069482803344727,
226
+ "eval_precision": 0.8318749701340793,
227
+ "eval_recall": 0.8137651821862348,
228
+ "eval_runtime": 1.1687,
229
+ "eval_samples_per_second": 634.021,
230
+ "eval_steps_per_second": 20.535,
231
+ "step": 800
232
+ },
233
+ {
234
+ "epoch": 2.29,
235
+ "eval_accuracy": 0.8137651821862348,
236
+ "eval_binary": 0.8685560053981101,
237
+ "eval_f1": 0.8088124908295262,
238
+ "eval_loss": 0.6896880269050598,
239
+ "eval_precision": 0.8376693376217073,
240
+ "eval_recall": 0.8137651821862348,
241
+ "eval_runtime": 1.1497,
242
+ "eval_samples_per_second": 644.512,
243
+ "eval_steps_per_second": 20.875,
244
+ "step": 850
245
+ },
246
+ {
247
+ "epoch": 2.43,
248
+ "eval_accuracy": 0.8070175438596491,
249
+ "eval_binary": 0.8650472334682855,
250
+ "eval_f1": 0.8034708035750392,
251
+ "eval_loss": 0.6972086429595947,
252
+ "eval_precision": 0.8367843338997185,
253
+ "eval_recall": 0.8070175438596491,
254
+ "eval_runtime": 1.1633,
255
+ "eval_samples_per_second": 636.983,
256
+ "eval_steps_per_second": 20.631,
257
+ "step": 900
258
+ },
259
+ {
260
+ "epoch": 2.56,
261
+ "eval_accuracy": 0.8164642375168691,
262
+ "eval_binary": 0.8704453441295538,
263
+ "eval_f1": 0.813993268748201,
264
+ "eval_loss": 0.6781038641929626,
265
+ "eval_precision": 0.8366608032093864,
266
+ "eval_recall": 0.8164642375168691,
267
+ "eval_runtime": 1.1823,
268
+ "eval_samples_per_second": 626.741,
269
+ "eval_steps_per_second": 20.299,
270
+ "step": 950
271
+ },
272
+ {
273
+ "epoch": 2.7,
274
+ "eval_accuracy": 0.815114709851552,
275
+ "eval_binary": 0.8703103913630216,
276
+ "eval_f1": 0.8100265350585438,
277
+ "eval_loss": 0.6554349660873413,
278
+ "eval_precision": 0.8380368300914861,
279
+ "eval_recall": 0.815114709851552,
280
+ "eval_runtime": 1.1543,
281
+ "eval_samples_per_second": 641.937,
282
+ "eval_steps_per_second": 20.791,
283
+ "step": 1000
284
+ },
285
+ {
286
+ "epoch": 2.83,
287
+ "eval_accuracy": 0.8218623481781376,
288
+ "eval_binary": 0.8759784075573539,
289
+ "eval_f1": 0.8182895287255976,
290
+ "eval_loss": 0.6537468433380127,
291
+ "eval_precision": 0.8421630941099861,
292
+ "eval_recall": 0.8218623481781376,
293
+ "eval_runtime": 1.1679,
294
+ "eval_samples_per_second": 634.474,
295
+ "eval_steps_per_second": 20.55,
296
+ "step": 1050
297
+ },
298
+ {
299
+ "epoch": 2.96,
300
+ "eval_accuracy": 0.8407557354925776,
301
+ "eval_binary": 0.8882591093117402,
302
+ "eval_f1": 0.8327839441497319,
303
+ "eval_loss": 0.6294392347335815,
304
+ "eval_precision": 0.8517352283784768,
305
+ "eval_recall": 0.8407557354925776,
306
+ "eval_runtime": 1.1945,
307
+ "eval_samples_per_second": 620.327,
308
+ "eval_steps_per_second": 20.092,
309
+ "step": 1100
310
+ },
311
+ {
312
+ "epoch": 3.0,
313
+ "grad_norm": 4.313427925109863,
314
+ "learning_rate": 9e-05,
315
+ "loss": 0.8613,
316
+ "step": 1113
317
+ },
318
+ {
319
+ "epoch": 3.1,
320
+ "eval_accuracy": 0.8340080971659919,
321
+ "eval_binary": 0.8840755735492567,
322
+ "eval_f1": 0.8305322924524646,
323
+ "eval_loss": 0.5901665687561035,
324
+ "eval_precision": 0.849580143203625,
325
+ "eval_recall": 0.8340080971659919,
326
+ "eval_runtime": 1.1609,
327
+ "eval_samples_per_second": 638.313,
328
+ "eval_steps_per_second": 20.674,
329
+ "step": 1150
330
+ },
331
+ {
332
+ "epoch": 3.23,
333
+ "eval_accuracy": 0.8286099865047234,
334
+ "eval_binary": 0.8800269905533059,
335
+ "eval_f1": 0.8244801946311869,
336
+ "eval_loss": 0.6372344493865967,
337
+ "eval_precision": 0.8429155377535945,
338
+ "eval_recall": 0.8286099865047234,
339
+ "eval_runtime": 1.1722,
340
+ "eval_samples_per_second": 632.156,
341
+ "eval_steps_per_second": 20.475,
342
+ "step": 1200
343
+ },
344
+ {
345
+ "epoch": 3.37,
346
+ "eval_accuracy": 0.8502024291497976,
347
+ "eval_binary": 0.8960863697705795,
348
+ "eval_f1": 0.8459444607208269,
349
+ "eval_loss": 0.5361122488975525,
350
+ "eval_precision": 0.8694864046584694,
351
+ "eval_recall": 0.8502024291497976,
352
+ "eval_runtime": 1.1813,
353
+ "eval_samples_per_second": 627.265,
354
+ "eval_steps_per_second": 20.316,
355
+ "step": 1250
356
+ },
357
+ {
358
+ "epoch": 3.5,
359
+ "eval_accuracy": 0.8690958164642375,
360
+ "eval_binary": 0.9082321187584337,
361
+ "eval_f1": 0.8673805683180668,
362
+ "eval_loss": 0.5066567659378052,
363
+ "eval_precision": 0.8832905286346582,
364
+ "eval_recall": 0.8690958164642375,
365
+ "eval_runtime": 1.1611,
366
+ "eval_samples_per_second": 638.193,
367
+ "eval_steps_per_second": 20.67,
368
+ "step": 1300
369
+ },
370
+ {
371
+ "epoch": 3.64,
372
+ "eval_accuracy": 0.8690958164642375,
373
+ "eval_binary": 0.908636977058029,
374
+ "eval_f1": 0.8662532335354076,
375
+ "eval_loss": 0.4976394772529602,
376
+ "eval_precision": 0.8768591769603915,
377
+ "eval_recall": 0.8690958164642375,
378
+ "eval_runtime": 1.1871,
379
+ "eval_samples_per_second": 624.2,
380
+ "eval_steps_per_second": 20.217,
381
+ "step": 1350
382
+ },
383
+ {
384
+ "epoch": 3.77,
385
+ "eval_accuracy": 0.863697705802969,
386
+ "eval_binary": 0.9044534412955457,
387
+ "eval_f1": 0.858932558800567,
388
+ "eval_loss": 0.5320582985877991,
389
+ "eval_precision": 0.8738656979547668,
390
+ "eval_recall": 0.863697705802969,
391
+ "eval_runtime": 1.1707,
392
+ "eval_samples_per_second": 632.936,
393
+ "eval_steps_per_second": 20.5,
394
+ "step": 1400
395
+ },
396
+ {
397
+ "epoch": 3.91,
398
+ "eval_accuracy": 0.8529014844804319,
399
+ "eval_binary": 0.8963562753036428,
400
+ "eval_f1": 0.8490350520713121,
401
+ "eval_loss": 0.6004767417907715,
402
+ "eval_precision": 0.8682368800789853,
403
+ "eval_recall": 0.8529014844804319,
404
+ "eval_runtime": 1.1646,
405
+ "eval_samples_per_second": 636.271,
406
+ "eval_steps_per_second": 20.608,
407
+ "step": 1450
408
+ },
409
+ {
410
+ "epoch": 4.0,
411
+ "grad_norm": 6.45872163772583,
412
+ "learning_rate": 8.666666666666667e-05,
413
+ "loss": 0.6427,
414
+ "step": 1484
415
+ },
416
+ {
417
+ "epoch": 4.04,
418
+ "eval_accuracy": 0.8609986504723347,
419
+ "eval_binary": 0.9025641025641017,
420
+ "eval_f1": 0.8585759542711945,
421
+ "eval_loss": 0.5946248173713684,
422
+ "eval_precision": 0.8751688934053783,
423
+ "eval_recall": 0.8609986504723347,
424
+ "eval_runtime": 1.1421,
425
+ "eval_samples_per_second": 648.789,
426
+ "eval_steps_per_second": 21.013,
427
+ "step": 1500
428
+ },
429
+ {
430
+ "epoch": 4.18,
431
+ "eval_accuracy": 0.8690958164642375,
432
+ "eval_binary": 0.9079622132253704,
433
+ "eval_f1": 0.8669378239477148,
434
+ "eval_loss": 0.5153417587280273,
435
+ "eval_precision": 0.8819396228303111,
436
+ "eval_recall": 0.8690958164642375,
437
+ "eval_runtime": 1.1485,
438
+ "eval_samples_per_second": 645.203,
439
+ "eval_steps_per_second": 20.897,
440
+ "step": 1550
441
+ },
442
+ {
443
+ "epoch": 4.31,
444
+ "eval_accuracy": 0.8569500674763832,
445
+ "eval_binary": 0.8986504723346818,
446
+ "eval_f1": 0.855017890388154,
447
+ "eval_loss": 0.569421648979187,
448
+ "eval_precision": 0.8722095081406823,
449
+ "eval_recall": 0.8569500674763832,
450
+ "eval_runtime": 1.1556,
451
+ "eval_samples_per_second": 641.21,
452
+ "eval_steps_per_second": 20.768,
453
+ "step": 1600
454
+ },
455
+ {
456
+ "epoch": 4.45,
457
+ "eval_accuracy": 0.8717948717948718,
458
+ "eval_binary": 0.9094466936572189,
459
+ "eval_f1": 0.869835417212902,
460
+ "eval_loss": 0.5046663880348206,
461
+ "eval_precision": 0.8909411654648616,
462
+ "eval_recall": 0.8717948717948718,
463
+ "eval_runtime": 1.1301,
464
+ "eval_samples_per_second": 655.718,
465
+ "eval_steps_per_second": 21.238,
466
+ "step": 1650
467
+ },
468
+ {
469
+ "epoch": 4.58,
470
+ "eval_accuracy": 0.8717948717948718,
471
+ "eval_binary": 0.9099865047233455,
472
+ "eval_f1": 0.8699350147474851,
473
+ "eval_loss": 0.5270289182662964,
474
+ "eval_precision": 0.8804698038503707,
475
+ "eval_recall": 0.8717948717948718,
476
+ "eval_runtime": 1.1458,
477
+ "eval_samples_per_second": 646.682,
478
+ "eval_steps_per_second": 20.945,
479
+ "step": 1700
480
+ },
481
+ {
482
+ "epoch": 4.72,
483
+ "eval_accuracy": 0.8569500674763832,
484
+ "eval_binary": 0.9001349527665304,
485
+ "eval_f1": 0.8530294562245342,
486
+ "eval_loss": 0.5278229117393494,
487
+ "eval_precision": 0.8713434076794401,
488
+ "eval_recall": 0.8569500674763832,
489
+ "eval_runtime": 1.1651,
490
+ "eval_samples_per_second": 635.992,
491
+ "eval_steps_per_second": 20.599,
492
+ "step": 1750
493
+ },
494
+ {
495
+ "epoch": 4.85,
496
+ "eval_accuracy": 0.8771929824561403,
497
+ "eval_binary": 0.9148448043184877,
498
+ "eval_f1": 0.8765031109078509,
499
+ "eval_loss": 0.5112797021865845,
500
+ "eval_precision": 0.8899662303508457,
501
+ "eval_recall": 0.8771929824561403,
502
+ "eval_runtime": 1.161,
503
+ "eval_samples_per_second": 638.269,
504
+ "eval_steps_per_second": 20.673,
505
+ "step": 1800
506
+ },
507
+ {
508
+ "epoch": 4.99,
509
+ "eval_accuracy": 0.8677462887989204,
510
+ "eval_binary": 0.90715249662618,
511
+ "eval_f1": 0.864758306975439,
512
+ "eval_loss": 0.5557437539100647,
513
+ "eval_precision": 0.8762526459894882,
514
+ "eval_recall": 0.8677462887989204,
515
+ "eval_runtime": 1.2253,
516
+ "eval_samples_per_second": 604.742,
517
+ "eval_steps_per_second": 19.587,
518
+ "step": 1850
519
+ },
520
+ {
521
+ "epoch": 5.0,
522
+ "grad_norm": 4.927316188812256,
523
+ "learning_rate": 8.333333333333334e-05,
524
+ "loss": 0.5222,
525
+ "step": 1855
526
+ },
527
+ {
528
+ "epoch": 5.12,
529
+ "eval_accuracy": 0.8717948717948718,
530
+ "eval_binary": 0.9093117408906874,
531
+ "eval_f1": 0.8695718983671247,
532
+ "eval_loss": 0.5396840572357178,
533
+ "eval_precision": 0.8828931646269107,
534
+ "eval_recall": 0.8717948717948718,
535
+ "eval_runtime": 1.1447,
536
+ "eval_samples_per_second": 647.325,
537
+ "eval_steps_per_second": 20.966,
538
+ "step": 1900
539
+ },
540
+ {
541
+ "epoch": 5.26,
542
+ "eval_accuracy": 0.8785425101214575,
543
+ "eval_binary": 0.9141700404858291,
544
+ "eval_f1": 0.8778235100479747,
545
+ "eval_loss": 0.5459949970245361,
546
+ "eval_precision": 0.8931182365392891,
547
+ "eval_recall": 0.8785425101214575,
548
+ "eval_runtime": 1.1802,
549
+ "eval_samples_per_second": 627.864,
550
+ "eval_steps_per_second": 20.336,
551
+ "step": 1950
552
+ },
553
+ {
554
+ "epoch": 5.39,
555
+ "eval_accuracy": 0.8717948717948718,
556
+ "eval_binary": 0.909986504723346,
557
+ "eval_f1": 0.8702993779030054,
558
+ "eval_loss": 0.5364383459091187,
559
+ "eval_precision": 0.88322276389078,
560
+ "eval_recall": 0.8717948717948718,
561
+ "eval_runtime": 1.1678,
562
+ "eval_samples_per_second": 634.529,
563
+ "eval_steps_per_second": 20.552,
564
+ "step": 2000
565
+ },
566
+ {
567
+ "epoch": 5.53,
568
+ "eval_accuracy": 0.8717948717948718,
569
+ "eval_binary": 0.909986504723346,
570
+ "eval_f1": 0.8693471297847463,
571
+ "eval_loss": 0.5403723120689392,
572
+ "eval_precision": 0.8857783354746918,
573
+ "eval_recall": 0.8717948717948718,
574
+ "eval_runtime": 1.2223,
575
+ "eval_samples_per_second": 606.259,
576
+ "eval_steps_per_second": 19.636,
577
+ "step": 2050
578
+ },
579
+ {
580
+ "epoch": 5.66,
581
+ "eval_accuracy": 0.8690958164642375,
582
+ "eval_binary": 0.9071524966261797,
583
+ "eval_f1": 0.8663898349797059,
584
+ "eval_loss": 0.6065850257873535,
585
+ "eval_precision": 0.8798577528334613,
586
+ "eval_recall": 0.8690958164642375,
587
+ "eval_runtime": 1.1672,
588
+ "eval_samples_per_second": 634.865,
589
+ "eval_steps_per_second": 20.562,
590
+ "step": 2100
591
+ },
592
+ {
593
+ "epoch": 5.8,
594
+ "eval_accuracy": 0.8569500674763832,
595
+ "eval_binary": 0.8995951417004041,
596
+ "eval_f1": 0.8543825643734608,
597
+ "eval_loss": 0.609086275100708,
598
+ "eval_precision": 0.8699245573739501,
599
+ "eval_recall": 0.8569500674763832,
600
+ "eval_runtime": 1.1615,
601
+ "eval_samples_per_second": 637.949,
602
+ "eval_steps_per_second": 20.662,
603
+ "step": 2150
604
+ },
605
+ {
606
+ "epoch": 5.93,
607
+ "eval_accuracy": 0.863697705802969,
608
+ "eval_binary": 0.9044534412955451,
609
+ "eval_f1": 0.8615752789891996,
610
+ "eval_loss": 0.5661621689796448,
611
+ "eval_precision": 0.8777716435611175,
612
+ "eval_recall": 0.863697705802969,
613
+ "eval_runtime": 1.1549,
614
+ "eval_samples_per_second": 641.6,
615
+ "eval_steps_per_second": 20.781,
616
+ "step": 2200
617
+ },
618
+ {
619
+ "epoch": 6.0,
620
+ "grad_norm": 3.95198392868042,
621
+ "learning_rate": 8e-05,
622
+ "loss": 0.4432,
623
+ "step": 2226
624
+ },
625
+ {
626
+ "epoch": 6.06,
627
+ "eval_accuracy": 0.8623481781376519,
628
+ "eval_binary": 0.9033738191632924,
629
+ "eval_f1": 0.8596934053855256,
630
+ "eval_loss": 0.6643653512001038,
631
+ "eval_precision": 0.8785115319528275,
632
+ "eval_recall": 0.8623481781376519,
633
+ "eval_runtime": 1.195,
634
+ "eval_samples_per_second": 620.059,
635
+ "eval_steps_per_second": 20.083,
636
+ "step": 2250
637
+ },
638
+ {
639
+ "epoch": 6.2,
640
+ "eval_accuracy": 0.8609986504723347,
641
+ "eval_binary": 0.9020242914979746,
642
+ "eval_f1": 0.858697058942677,
643
+ "eval_loss": 0.6462463736534119,
644
+ "eval_precision": 0.874255314336286,
645
+ "eval_recall": 0.8609986504723347,
646
+ "eval_runtime": 1.1742,
647
+ "eval_samples_per_second": 631.066,
648
+ "eval_steps_per_second": 20.439,
649
+ "step": 2300
650
+ },
651
+ {
652
+ "epoch": 6.33,
653
+ "eval_accuracy": 0.8771929824561403,
654
+ "eval_binary": 0.9130904183535752,
655
+ "eval_f1": 0.8725328911535255,
656
+ "eval_loss": 0.5374723672866821,
657
+ "eval_precision": 0.8823031219792354,
658
+ "eval_recall": 0.8771929824561403,
659
+ "eval_runtime": 1.1706,
660
+ "eval_samples_per_second": 632.998,
661
+ "eval_steps_per_second": 20.502,
662
+ "step": 2350
663
+ },
664
+ {
665
+ "epoch": 6.47,
666
+ "eval_accuracy": 0.863697705802969,
667
+ "eval_binary": 0.9044534412955454,
668
+ "eval_f1": 0.8571119491194448,
669
+ "eval_loss": 0.6624985337257385,
670
+ "eval_precision": 0.8729715216557322,
671
+ "eval_recall": 0.863697705802969,
672
+ "eval_runtime": 1.1671,
673
+ "eval_samples_per_second": 634.923,
674
+ "eval_steps_per_second": 20.564,
675
+ "step": 2400
676
+ },
677
+ {
678
+ "epoch": 6.6,
679
+ "eval_accuracy": 0.8704453441295547,
680
+ "eval_binary": 0.9105263157894729,
681
+ "eval_f1": 0.8676961482191342,
682
+ "eval_loss": 0.5533118844032288,
683
+ "eval_precision": 0.880092126347187,
684
+ "eval_recall": 0.8704453441295547,
685
+ "eval_runtime": 1.1466,
686
+ "eval_samples_per_second": 646.275,
687
+ "eval_steps_per_second": 20.932,
688
+ "step": 2450
689
+ },
690
+ {
691
+ "epoch": 6.74,
692
+ "eval_accuracy": 0.8744939271255061,
693
+ "eval_binary": 0.9122807017543849,
694
+ "eval_f1": 0.871742048083642,
695
+ "eval_loss": 0.6021706461906433,
696
+ "eval_precision": 0.8837557104763583,
697
+ "eval_recall": 0.8744939271255061,
698
+ "eval_runtime": 1.1589,
699
+ "eval_samples_per_second": 639.39,
700
+ "eval_steps_per_second": 20.709,
701
+ "step": 2500
702
+ },
703
+ {
704
+ "epoch": 6.87,
705
+ "eval_accuracy": 0.8663967611336032,
706
+ "eval_binary": 0.9063427800269895,
707
+ "eval_f1": 0.8625935560840108,
708
+ "eval_loss": 0.6515318751335144,
709
+ "eval_precision": 0.8741236309050073,
710
+ "eval_recall": 0.8663967611336032,
711
+ "eval_runtime": 1.1683,
712
+ "eval_samples_per_second": 634.261,
713
+ "eval_steps_per_second": 20.543,
714
+ "step": 2550
715
+ },
716
+ {
717
+ "epoch": 7.0,
718
+ "grad_norm": 4.3103928565979,
719
+ "learning_rate": 7.666666666666667e-05,
720
+ "loss": 0.3999,
721
+ "step": 2597
722
+ },
723
+ {
724
+ "epoch": 7.01,
725
+ "eval_accuracy": 0.8717948717948718,
726
+ "eval_binary": 0.9103913630229407,
727
+ "eval_f1": 0.8700599583697827,
728
+ "eval_loss": 0.5570635795593262,
729
+ "eval_precision": 0.8831367238249829,
730
+ "eval_recall": 0.8717948717948718,
731
+ "eval_runtime": 1.1765,
732
+ "eval_samples_per_second": 629.854,
733
+ "eval_steps_per_second": 20.4,
734
+ "step": 2600
735
+ },
736
+ {
737
+ "epoch": 7.14,
738
+ "eval_accuracy": 0.8785425101214575,
739
+ "eval_binary": 0.9151147098515511,
740
+ "eval_f1": 0.8766044389904654,
741
+ "eval_loss": 0.5620068311691284,
742
+ "eval_precision": 0.8879959446761065,
743
+ "eval_recall": 0.8785425101214575,
744
+ "eval_runtime": 1.1587,
745
+ "eval_samples_per_second": 639.508,
746
+ "eval_steps_per_second": 20.713,
747
+ "step": 2650
748
+ },
749
+ {
750
+ "epoch": 7.28,
751
+ "eval_accuracy": 0.8798920377867746,
752
+ "eval_binary": 0.9160593792172727,
753
+ "eval_f1": 0.8778735845274245,
754
+ "eval_loss": 0.5602428317070007,
755
+ "eval_precision": 0.887021746434702,
756
+ "eval_recall": 0.8798920377867746,
757
+ "eval_runtime": 1.1417,
758
+ "eval_samples_per_second": 649.02,
759
+ "eval_steps_per_second": 21.021,
760
+ "step": 2700
761
+ },
762
+ {
763
+ "epoch": 7.41,
764
+ "eval_accuracy": 0.8758434547908233,
765
+ "eval_binary": 0.9122807017543852,
766
+ "eval_f1": 0.8734723052118281,
767
+ "eval_loss": 0.5291705131530762,
768
+ "eval_precision": 0.884985605937023,
769
+ "eval_recall": 0.8758434547908233,
770
+ "eval_runtime": 1.1639,
771
+ "eval_samples_per_second": 636.647,
772
+ "eval_steps_per_second": 20.62,
773
+ "step": 2750
774
+ },
775
+ {
776
+ "epoch": 7.55,
777
+ "eval_accuracy": 0.873144399460189,
778
+ "eval_binary": 0.9109311740890679,
779
+ "eval_f1": 0.8728506306960341,
780
+ "eval_loss": 0.6643885374069214,
781
+ "eval_precision": 0.8896590139505118,
782
+ "eval_recall": 0.873144399460189,
783
+ "eval_runtime": 1.157,
784
+ "eval_samples_per_second": 640.431,
785
+ "eval_steps_per_second": 20.743,
786
+ "step": 2800
787
+ },
788
+ {
789
+ "epoch": 7.68,
790
+ "eval_accuracy": 0.8771929824561403,
791
+ "eval_binary": 0.9132253711201073,
792
+ "eval_f1": 0.8755511463124777,
793
+ "eval_loss": 0.587805449962616,
794
+ "eval_precision": 0.8871340834903588,
795
+ "eval_recall": 0.8771929824561403,
796
+ "eval_runtime": 1.1575,
797
+ "eval_samples_per_second": 640.186,
798
+ "eval_steps_per_second": 20.735,
799
+ "step": 2850
800
+ },
801
+ {
802
+ "epoch": 7.82,
803
+ "eval_accuracy": 0.8677462887989204,
804
+ "eval_binary": 0.9062078272604579,
805
+ "eval_f1": 0.8663130912154735,
806
+ "eval_loss": 0.6276022791862488,
807
+ "eval_precision": 0.8797659493003622,
808
+ "eval_recall": 0.8677462887989204,
809
+ "eval_runtime": 1.1612,
810
+ "eval_samples_per_second": 638.108,
811
+ "eval_steps_per_second": 20.667,
812
+ "step": 2900
813
+ },
814
+ {
815
+ "epoch": 7.95,
816
+ "eval_accuracy": 0.873144399460189,
817
+ "eval_binary": 0.9124156545209167,
818
+ "eval_f1": 0.8719900033412215,
819
+ "eval_loss": 0.5735969543457031,
820
+ "eval_precision": 0.8796167283009387,
821
+ "eval_recall": 0.873144399460189,
822
+ "eval_runtime": 1.2203,
823
+ "eval_samples_per_second": 607.227,
824
+ "eval_steps_per_second": 19.667,
825
+ "step": 2950
826
+ },
827
+ {
828
+ "epoch": 8.0,
829
+ "grad_norm": 5.361498832702637,
830
+ "learning_rate": 7.333333333333333e-05,
831
+ "loss": 0.3609,
832
+ "step": 2968
833
+ },
834
+ {
835
+ "epoch": 8.09,
836
+ "eval_accuracy": 0.8771929824561403,
837
+ "eval_binary": 0.9143049932523605,
838
+ "eval_f1": 0.8768958010180178,
839
+ "eval_loss": 0.6093275547027588,
840
+ "eval_precision": 0.8875668341255385,
841
+ "eval_recall": 0.8771929824561403,
842
+ "eval_runtime": 1.1465,
843
+ "eval_samples_per_second": 646.287,
844
+ "eval_steps_per_second": 20.932,
845
+ "step": 3000
846
+ },
847
+ {
848
+ "epoch": 8.22,
849
+ "eval_accuracy": 0.873144399460189,
850
+ "eval_binary": 0.9099865047233455,
851
+ "eval_f1": 0.8728942603104318,
852
+ "eval_loss": 0.5862379670143127,
853
+ "eval_precision": 0.8861477141841515,
854
+ "eval_recall": 0.873144399460189,
855
+ "eval_runtime": 1.1249,
856
+ "eval_samples_per_second": 658.698,
857
+ "eval_steps_per_second": 21.334,
858
+ "step": 3050
859
+ },
860
+ {
861
+ "epoch": 8.36,
862
+ "eval_accuracy": 0.8839406207827261,
863
+ "eval_binary": 0.9179487179487168,
864
+ "eval_f1": 0.8812727621395834,
865
+ "eval_loss": 0.5736364126205444,
866
+ "eval_precision": 0.8869233630367234,
867
+ "eval_recall": 0.8839406207827261,
868
+ "eval_runtime": 1.1449,
869
+ "eval_samples_per_second": 647.229,
870
+ "eval_steps_per_second": 20.963,
871
+ "step": 3100
872
+ },
873
+ {
874
+ "epoch": 8.49,
875
+ "eval_accuracy": 0.8798920377867746,
876
+ "eval_binary": 0.915114709851551,
877
+ "eval_f1": 0.8781431747563492,
878
+ "eval_loss": 0.5536712408065796,
879
+ "eval_precision": 0.8903398258661418,
880
+ "eval_recall": 0.8798920377867746,
881
+ "eval_runtime": 1.128,
882
+ "eval_samples_per_second": 656.932,
883
+ "eval_steps_per_second": 21.277,
884
+ "step": 3150
885
+ },
886
+ {
887
+ "epoch": 8.63,
888
+ "eval_accuracy": 0.8771929824561403,
889
+ "eval_binary": 0.9137651821862336,
890
+ "eval_f1": 0.8748654521289038,
891
+ "eval_loss": 0.5788822770118713,
892
+ "eval_precision": 0.8854517989133374,
893
+ "eval_recall": 0.8771929824561403,
894
+ "eval_runtime": 1.1554,
895
+ "eval_samples_per_second": 641.322,
896
+ "eval_steps_per_second": 20.772,
897
+ "step": 3200
898
+ },
899
+ {
900
+ "epoch": 8.76,
901
+ "eval_accuracy": 0.8758434547908233,
902
+ "eval_binary": 0.9129554655870437,
903
+ "eval_f1": 0.873432972085747,
904
+ "eval_loss": 0.6053817272186279,
905
+ "eval_precision": 0.8816829856303541,
906
+ "eval_recall": 0.8758434547908233,
907
+ "eval_runtime": 1.1318,
908
+ "eval_samples_per_second": 654.685,
909
+ "eval_steps_per_second": 21.204,
910
+ "step": 3250
911
+ },
912
+ {
913
+ "epoch": 8.89,
914
+ "eval_accuracy": 0.8866396761133604,
915
+ "eval_binary": 0.9203778677462877,
916
+ "eval_f1": 0.8852200459634372,
917
+ "eval_loss": 0.5305805206298828,
918
+ "eval_precision": 0.8933747127066964,
919
+ "eval_recall": 0.8866396761133604,
920
+ "eval_runtime": 1.1366,
921
+ "eval_samples_per_second": 651.943,
922
+ "eval_steps_per_second": 21.116,
923
+ "step": 3300
924
+ },
925
+ {
926
+ "epoch": 9.0,
927
+ "grad_norm": 2.9827425479888916,
928
+ "learning_rate": 7.000898472596586e-05,
929
+ "loss": 0.3284,
930
+ "step": 3339
931
+ },
932
+ {
933
+ "epoch": 9.03,
934
+ "eval_accuracy": 0.8866396761133604,
935
+ "eval_binary": 0.9203778677462882,
936
+ "eval_f1": 0.8851786363349267,
937
+ "eval_loss": 0.5871447920799255,
938
+ "eval_precision": 0.8929871747685514,
939
+ "eval_recall": 0.8866396761133604,
940
+ "eval_runtime": 1.1268,
941
+ "eval_samples_per_second": 657.63,
942
+ "eval_steps_per_second": 21.3,
943
+ "step": 3350
944
+ },
945
+ {
946
+ "epoch": 9.16,
947
+ "eval_accuracy": 0.8717948717948718,
948
+ "eval_binary": 0.9114709851551945,
949
+ "eval_f1": 0.8692786610310018,
950
+ "eval_loss": 0.6212562918663025,
951
+ "eval_precision": 0.8831649569503822,
952
+ "eval_recall": 0.8717948717948718,
953
+ "eval_runtime": 1.125,
954
+ "eval_samples_per_second": 658.664,
955
+ "eval_steps_per_second": 21.333,
956
+ "step": 3400
957
+ },
958
+ {
959
+ "epoch": 9.3,
960
+ "eval_accuracy": 0.8893387314439946,
961
+ "eval_binary": 0.921862348178137,
962
+ "eval_f1": 0.8877552195071403,
963
+ "eval_loss": 0.5114206671714783,
964
+ "eval_precision": 0.8954543919118818,
965
+ "eval_recall": 0.8893387314439946,
966
+ "eval_runtime": 1.1299,
967
+ "eval_samples_per_second": 655.79,
968
+ "eval_steps_per_second": 21.24,
969
+ "step": 3450
970
+ },
971
+ {
972
+ "epoch": 9.43,
973
+ "eval_accuracy": 0.8758434547908233,
974
+ "eval_binary": 0.9137651821862343,
975
+ "eval_f1": 0.8732295909377273,
976
+ "eval_loss": 0.5960059762001038,
977
+ "eval_precision": 0.881588392114708,
978
+ "eval_recall": 0.8758434547908233,
979
+ "eval_runtime": 1.1351,
980
+ "eval_samples_per_second": 652.779,
981
+ "eval_steps_per_second": 21.143,
982
+ "step": 3500
983
+ },
984
+ {
985
+ "epoch": 9.57,
986
+ "eval_accuracy": 0.8825910931174089,
987
+ "eval_binary": 0.9190283400809708,
988
+ "eval_f1": 0.8811241165340947,
989
+ "eval_loss": 0.6340000629425049,
990
+ "eval_precision": 0.8921271074105084,
991
+ "eval_recall": 0.8825910931174089,
992
+ "eval_runtime": 1.1227,
993
+ "eval_samples_per_second": 660.008,
994
+ "eval_steps_per_second": 21.377,
995
+ "step": 3550
996
+ },
997
+ {
998
+ "epoch": 9.7,
999
+ "eval_accuracy": 0.8758434547908233,
1000
+ "eval_binary": 0.9126855600539802,
1001
+ "eval_f1": 0.8735923320711677,
1002
+ "eval_loss": 0.606035590171814,
1003
+ "eval_precision": 0.8846766855876167,
1004
+ "eval_recall": 0.8758434547908233,
1005
+ "eval_runtime": 1.1444,
1006
+ "eval_samples_per_second": 647.476,
1007
+ "eval_steps_per_second": 20.971,
1008
+ "step": 3600
1009
+ },
1010
+ {
1011
+ "epoch": 9.84,
1012
+ "eval_accuracy": 0.8704453441295547,
1013
+ "eval_binary": 0.9099865047233459,
1014
+ "eval_f1": 0.8673800362820931,
1015
+ "eval_loss": 0.6465194821357727,
1016
+ "eval_precision": 0.8800944669365723,
1017
+ "eval_recall": 0.8704453441295547,
1018
+ "eval_runtime": 1.1227,
1019
+ "eval_samples_per_second": 659.991,
1020
+ "eval_steps_per_second": 21.376,
1021
+ "step": 3650
1022
+ },
1023
+ {
1024
+ "epoch": 9.97,
1025
+ "eval_accuracy": 0.8785425101214575,
1026
+ "eval_binary": 0.9148448043184876,
1027
+ "eval_f1": 0.8767805082692359,
1028
+ "eval_loss": 0.6755142211914062,
1029
+ "eval_precision": 0.8911575117040704,
1030
+ "eval_recall": 0.8785425101214575,
1031
+ "eval_runtime": 1.1379,
1032
+ "eval_samples_per_second": 651.188,
1033
+ "eval_steps_per_second": 21.091,
1034
+ "step": 3700
1035
+ },
1036
+ {
1037
+ "epoch": 10.0,
1038
+ "grad_norm": 4.129298686981201,
1039
+ "learning_rate": 6.667565139263254e-05,
1040
+ "loss": 0.3062,
1041
+ "step": 3710
1042
+ },
1043
+ {
1044
+ "epoch": 10.11,
1045
+ "eval_accuracy": 0.8785425101214575,
1046
+ "eval_binary": 0.9161943319838048,
1047
+ "eval_f1": 0.877249561583927,
1048
+ "eval_loss": 0.6168139576911926,
1049
+ "eval_precision": 0.8873360033279062,
1050
+ "eval_recall": 0.8785425101214575,
1051
+ "eval_runtime": 1.1239,
1052
+ "eval_samples_per_second": 659.319,
1053
+ "eval_steps_per_second": 21.354,
1054
+ "step": 3750
1055
+ },
1056
+ {
1057
+ "epoch": 10.24,
1058
+ "eval_accuracy": 0.8812415654520918,
1059
+ "eval_binary": 0.917543859649122,
1060
+ "eval_f1": 0.8794928490642383,
1061
+ "eval_loss": 0.6084047555923462,
1062
+ "eval_precision": 0.8893881946513525,
1063
+ "eval_recall": 0.8812415654520918,
1064
+ "eval_runtime": 1.1288,
1065
+ "eval_samples_per_second": 656.471,
1066
+ "eval_steps_per_second": 21.262,
1067
+ "step": 3800
1068
+ },
1069
+ {
1070
+ "epoch": 10.38,
1071
+ "eval_accuracy": 0.8704453441295547,
1072
+ "eval_binary": 0.9106612685560047,
1073
+ "eval_f1": 0.8674074511392077,
1074
+ "eval_loss": 0.733109712600708,
1075
+ "eval_precision": 0.8810352752660445,
1076
+ "eval_recall": 0.8704453441295547,
1077
+ "eval_runtime": 1.1179,
1078
+ "eval_samples_per_second": 662.846,
1079
+ "eval_steps_per_second": 21.469,
1080
+ "step": 3850
1081
+ },
1082
+ {
1083
+ "epoch": 10.51,
1084
+ "eval_accuracy": 0.8704453441295547,
1085
+ "eval_binary": 0.9085020242914971,
1086
+ "eval_f1": 0.8681938294778445,
1087
+ "eval_loss": 0.6585559248924255,
1088
+ "eval_precision": 0.8790380185117028,
1089
+ "eval_recall": 0.8704453441295547,
1090
+ "eval_runtime": 1.1015,
1091
+ "eval_samples_per_second": 672.696,
1092
+ "eval_steps_per_second": 21.788,
1093
+ "step": 3900
1094
+ },
1095
+ {
1096
+ "epoch": 10.65,
1097
+ "eval_accuracy": 0.865047233468286,
1098
+ "eval_binary": 0.9035087719298243,
1099
+ "eval_f1": 0.8619257548463832,
1100
+ "eval_loss": 0.7438000440597534,
1101
+ "eval_precision": 0.8722900768447327,
1102
+ "eval_recall": 0.865047233468286,
1103
+ "eval_runtime": 1.1141,
1104
+ "eval_samples_per_second": 665.131,
1105
+ "eval_steps_per_second": 21.543,
1106
+ "step": 3950
1107
+ },
1108
+ {
1109
+ "epoch": 10.78,
1110
+ "eval_accuracy": 0.8798920377867746,
1111
+ "eval_binary": 0.9157894736842094,
1112
+ "eval_f1": 0.8784962349040888,
1113
+ "eval_loss": 0.5920050740242004,
1114
+ "eval_precision": 0.8895519684993368,
1115
+ "eval_recall": 0.8798920377867746,
1116
+ "eval_runtime": 1.1303,
1117
+ "eval_samples_per_second": 655.59,
1118
+ "eval_steps_per_second": 21.234,
1119
+ "step": 4000
1120
+ }
1121
+ ],
1122
+ "logging_steps": 500,
1123
+ "max_steps": 11130,
1124
+ "num_input_tokens_seen": 0,
1125
+ "num_train_epochs": 30,
1126
+ "save_steps": 500,
1127
+ "total_flos": 5.3953021213009e+18,
1128
+ "train_batch_size": 32,
1129
+ "trial_name": null,
1130
+ "trial_params": null
1131
+ }
tmp-checkpoint-4000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad911284276196968ad8f0dfc5b63b04d6d3ebd414b55f47dc985e6b8a9095a5
3
+ size 4984