sekarmulyani commited on
Commit
c71d3b8
1 Parent(s): a622e1b

Upload 11 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. tokenizer.json +16 -2
  6. trainer_state.json +446 -188
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:715908a4b9ff716429fa0576972c829fc810817c0f9e0f5d424e961c59a5c115
3
  size 884664069
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d418320f9ee3f1dffca7832aecec7df759a80dada05861fc0afea3bec652bb9
3
  size 884664069
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd5894498fb85c08107b7978fd7b58d109ef92dcc76d7b0b390bfe479726123d
3
  size 442316593
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c52f18f3b87150eef81fabf3e5bbf077d3a640da74cebfb56afd87e7f4cc1786
3
  size 442316593
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f4ff69ce3a867fc6c92d51f98704f57a1baefeadbf478fa8dc3e04a96a1937f
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d5aca11c97f3a24aee2fd3a0e70898259f58b12fed8db72231afb4eb7278d66
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0c7d3ee4508404640122d0b540ef9f7b79c50f752e4177f6a40eaa4ce2e4108
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfda1b9f02fbd3afeeb0a45c45d91917b2d9331eeaabde2bd2a0199143beeebf
3
  size 627
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 384,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 384
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
trainer_state.json CHANGED
@@ -1,337 +1,595 @@
1
  {
2
- "best_metric": 0.49511729307051994,
3
- "best_model_checkpoint": "./indobertweet-review-rating/checkpoint-14300",
4
  "epoch": 6.0,
5
  "eval_steps": 500,
6
- "global_step": 21450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.14,
13
- "learning_rate": 1.9813519813519816e-05,
14
- "loss": 0.4207,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.28,
19
- "learning_rate": 1.962703962703963e-05,
20
- "loss": 0.3879,
21
  "step": 1000
22
  },
23
  {
24
- "epoch": 0.42,
25
- "learning_rate": 1.944055944055944e-05,
26
- "loss": 0.3832,
27
  "step": 1500
28
  },
29
  {
30
- "epoch": 0.56,
31
- "learning_rate": 1.9254079254079257e-05,
32
- "loss": 0.3796,
33
  "step": 2000
34
  },
35
  {
36
- "epoch": 0.7,
37
- "learning_rate": 1.906759906759907e-05,
38
- "loss": 0.3764,
39
  "step": 2500
40
  },
41
  {
42
- "epoch": 0.84,
43
- "learning_rate": 1.888111888111888e-05,
44
- "loss": 0.3732,
45
  "step": 3000
46
  },
47
  {
48
- "epoch": 0.98,
49
- "learning_rate": 1.8694638694638696e-05,
50
- "loss": 0.3715,
51
  "step": 3500
52
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  {
54
  "epoch": 1.0,
55
- "eval_akurasi": 0.304635761589404,
56
- "eval_f1": 0.41270686365854736,
57
- "eval_loss": 0.36894935369491577,
58
- "eval_roc_auc": 0.6308684676414661,
59
- "eval_runtime": 132.2279,
60
- "eval_samples_per_second": 115.339,
61
- "eval_steps_per_second": 7.215,
62
- "step": 3575
 
 
 
 
 
 
63
  },
64
  {
65
  "epoch": 1.12,
66
- "learning_rate": 1.850815850815851e-05,
67
- "loss": 0.3592,
68
- "step": 4000
 
 
 
 
 
 
69
  },
70
  {
71
  "epoch": 1.26,
72
- "learning_rate": 1.8321678321678323e-05,
73
- "loss": 0.3515,
74
- "step": 4500
 
 
 
 
 
 
75
  },
76
  {
77
  "epoch": 1.4,
78
- "learning_rate": 1.8135198135198137e-05,
79
- "loss": 0.3529,
80
- "step": 5000
 
 
 
 
 
 
81
  },
82
  {
83
  "epoch": 1.54,
84
- "learning_rate": 1.794871794871795e-05,
85
- "loss": 0.3497,
86
- "step": 5500
 
 
 
 
 
 
87
  },
88
  {
89
  "epoch": 1.68,
90
- "learning_rate": 1.7762237762237765e-05,
91
- "loss": 0.3489,
92
- "step": 6000
 
 
 
 
 
 
93
  },
94
  {
95
  "epoch": 1.82,
96
- "learning_rate": 1.7575757575757576e-05,
97
- "loss": 0.3511,
98
- "step": 6500
 
 
 
 
 
 
99
  },
100
  {
101
  "epoch": 1.96,
102
- "learning_rate": 1.738927738927739e-05,
103
- "loss": 0.3494,
104
- "step": 7000
105
  },
106
  {
107
  "epoch": 2.0,
108
- "eval_akurasi": 0.36633663366336633,
109
- "eval_f1": 0.4571545380212592,
110
- "eval_loss": 0.36878135800361633,
111
- "eval_roc_auc": 0.653645662579503,
112
- "eval_runtime": 132.2013,
113
- "eval_samples_per_second": 115.362,
114
- "eval_steps_per_second": 7.216,
115
- "step": 7150
 
 
 
 
 
 
116
  },
117
  {
118
  "epoch": 2.1,
119
- "learning_rate": 1.7202797202797203e-05,
120
- "loss": 0.327,
121
- "step": 7500
 
 
 
 
 
 
122
  },
123
  {
124
  "epoch": 2.24,
125
- "learning_rate": 1.7016317016317017e-05,
126
- "loss": 0.3207,
127
- "step": 8000
 
 
 
 
 
 
128
  },
129
  {
130
  "epoch": 2.38,
131
- "learning_rate": 1.682983682983683e-05,
132
- "loss": 0.3178,
133
- "step": 8500
 
 
 
 
 
 
134
  },
135
  {
136
  "epoch": 2.52,
137
- "learning_rate": 1.6643356643356645e-05,
138
- "loss": 0.3141,
139
- "step": 9000
 
 
 
 
 
 
140
  },
141
  {
142
  "epoch": 2.66,
143
- "learning_rate": 1.645687645687646e-05,
144
- "loss": 0.3183,
145
- "step": 9500
 
 
 
 
 
 
146
  },
147
  {
148
  "epoch": 2.8,
149
- "learning_rate": 1.6270396270396273e-05,
150
- "loss": 0.3226,
151
- "step": 10000
 
 
 
 
 
 
152
  },
153
  {
154
  "epoch": 2.94,
155
- "learning_rate": 1.6083916083916083e-05,
156
- "loss": 0.322,
157
- "step": 10500
158
  },
159
  {
160
  "epoch": 3.0,
161
- "eval_akurasi": 0.4225296701855616,
162
- "eval_f1": 0.4886535552193646,
163
- "eval_loss": 0.3799174726009369,
164
- "eval_roc_auc": 0.6730296374008262,
165
- "eval_runtime": 132.2733,
166
- "eval_samples_per_second": 115.299,
167
- "eval_steps_per_second": 7.212,
168
- "step": 10725
 
 
 
 
 
 
169
  },
170
  {
171
  "epoch": 3.08,
172
- "learning_rate": 1.5897435897435897e-05,
173
- "loss": 0.2957,
174
- "step": 11000
 
 
 
 
 
 
175
  },
176
  {
177
  "epoch": 3.22,
178
- "learning_rate": 1.5710955710955715e-05,
179
- "loss": 0.2751,
180
- "step": 11500
 
 
 
 
 
 
181
  },
182
  {
183
  "epoch": 3.36,
184
- "learning_rate": 1.5524475524475525e-05,
185
- "loss": 0.2724,
186
- "step": 12000
 
 
 
 
 
 
187
  },
188
  {
189
  "epoch": 3.5,
190
- "learning_rate": 1.533799533799534e-05,
191
- "loss": 0.2827,
192
- "step": 12500
 
 
 
 
 
 
193
  },
194
  {
195
  "epoch": 3.64,
196
- "learning_rate": 1.5151515151515153e-05,
197
- "loss": 0.2846,
198
- "step": 13000
 
 
 
 
 
 
199
  },
200
  {
201
  "epoch": 3.78,
202
- "learning_rate": 1.4965034965034965e-05,
203
- "loss": 0.2826,
204
- "step": 13500
 
 
 
 
 
 
205
  },
206
  {
207
  "epoch": 3.92,
208
- "learning_rate": 1.4778554778554779e-05,
209
- "loss": 0.2863,
210
- "step": 14000
 
 
 
 
 
 
211
  },
212
  {
213
  "epoch": 4.0,
214
- "eval_akurasi": 0.448101763818766,
215
- "eval_f1": 0.49511729307051994,
216
- "eval_loss": 0.412160724401474,
217
- "eval_roc_auc": 0.6790866172709985,
218
- "eval_runtime": 132.2949,
219
- "eval_samples_per_second": 115.28,
220
- "eval_steps_per_second": 7.211,
221
- "step": 14300
222
  },
223
  {
224
  "epoch": 4.06,
225
- "learning_rate": 1.4592074592074595e-05,
226
- "loss": 0.2652,
227
- "step": 14500
 
 
 
 
 
 
228
  },
229
  {
230
  "epoch": 4.2,
231
- "learning_rate": 1.4405594405594407e-05,
232
- "loss": 0.2333,
233
- "step": 15000
 
 
 
 
 
 
234
  },
235
  {
236
  "epoch": 4.34,
237
- "learning_rate": 1.421911421911422e-05,
238
- "loss": 0.2457,
239
- "step": 15500
 
 
 
 
 
 
240
  },
241
  {
242
  "epoch": 4.48,
243
- "learning_rate": 1.4032634032634035e-05,
244
- "loss": 0.244,
245
- "step": 16000
 
 
 
 
 
 
246
  },
247
  {
248
  "epoch": 4.62,
249
- "learning_rate": 1.3846153846153847e-05,
250
- "loss": 0.2438,
251
- "step": 16500
 
 
 
 
 
 
252
  },
253
  {
254
  "epoch": 4.76,
255
- "learning_rate": 1.365967365967366e-05,
256
- "loss": 0.2448,
257
- "step": 17000
 
 
 
 
 
 
258
  },
259
  {
260
  "epoch": 4.9,
261
- "learning_rate": 1.3473193473193473e-05,
262
- "loss": 0.2484,
263
- "step": 17500
 
 
 
 
 
 
264
  },
265
  {
266
  "epoch": 5.0,
267
- "eval_akurasi": 0.45597010032129043,
268
- "eval_f1": 0.48838265609168097,
269
- "eval_loss": 0.4510646164417267,
270
- "eval_roc_auc": 0.6768326667103797,
271
- "eval_runtime": 132.311,
272
- "eval_samples_per_second": 115.266,
273
- "eval_steps_per_second": 7.21,
274
- "step": 17875
275
- },
276
- {
277
- "epoch": 5.03,
278
- "learning_rate": 1.3286713286713288e-05,
279
- "loss": 0.2364,
280
- "step": 18000
281
  },
282
  {
283
- "epoch": 5.17,
284
- "learning_rate": 1.3100233100233102e-05,
285
- "loss": 0.2016,
286
- "step": 18500
287
  },
288
  {
289
- "epoch": 5.31,
290
- "learning_rate": 1.2913752913752915e-05,
291
- "loss": 0.2034,
292
- "step": 19000
293
  },
294
  {
295
- "epoch": 5.45,
296
- "learning_rate": 1.2727272727272728e-05,
297
- "loss": 0.2131,
298
- "step": 19500
299
  },
300
  {
301
- "epoch": 5.59,
302
- "learning_rate": 1.254079254079254e-05,
303
- "loss": 0.2076,
304
- "step": 20000
305
  },
306
  {
307
- "epoch": 5.73,
308
- "learning_rate": 1.2354312354312355e-05,
309
- "loss": 0.2032,
310
- "step": 20500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  },
312
  {
313
  "epoch": 5.87,
314
- "learning_rate": 1.216783216783217e-05,
315
- "loss": 0.2134,
316
- "step": 21000
 
 
 
 
 
 
317
  },
318
  {
319
  "epoch": 6.0,
320
- "eval_akurasi": 0.462986033702708,
321
- "eval_f1": 0.4940430572005852,
322
- "eval_loss": 0.4969989061355591,
323
- "eval_roc_auc": 0.680316044849518,
324
- "eval_runtime": 132.3664,
325
- "eval_samples_per_second": 115.218,
326
- "eval_steps_per_second": 7.207,
327
- "step": 21450
328
  }
329
  ],
330
  "logging_steps": 500,
331
- "max_steps": 53625,
332
- "num_train_epochs": 15,
333
  "save_steps": 500,
334
- "total_flos": 4.514475841140326e+16,
335
  "trial_name": null,
336
  "trial_params": null
337
  }
 
1
  {
2
+ "best_metric": 0.49651709628372404,
3
+ "best_model_checkpoint": "./indobertweet-review-rating/checkpoint-28596",
4
  "epoch": 6.0,
5
  "eval_steps": 500,
6
+ "global_step": 42894,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.07,
13
+ "learning_rate": 1.9825150370681216e-05,
14
+ "loss": 0.43,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.14,
19
+ "learning_rate": 1.965030074136243e-05,
20
+ "loss": 0.3942,
21
  "step": 1000
22
  },
23
  {
24
+ "epoch": 0.21,
25
+ "learning_rate": 1.9475451112043644e-05,
26
+ "loss": 0.3869,
27
  "step": 1500
28
  },
29
  {
30
+ "epoch": 0.28,
31
+ "learning_rate": 1.9300601482724858e-05,
32
+ "loss": 0.389,
33
  "step": 2000
34
  },
35
  {
36
+ "epoch": 0.35,
37
+ "learning_rate": 1.9125751853406072e-05,
38
+ "loss": 0.3822,
39
  "step": 2500
40
  },
41
  {
42
+ "epoch": 0.42,
43
+ "learning_rate": 1.8950902224087286e-05,
44
+ "loss": 0.3766,
45
  "step": 3000
46
  },
47
  {
48
+ "epoch": 0.49,
49
+ "learning_rate": 1.87760525947685e-05,
50
+ "loss": 0.382,
51
  "step": 3500
52
  },
53
+ {
54
+ "epoch": 0.56,
55
+ "learning_rate": 1.8601202965449715e-05,
56
+ "loss": 0.3849,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 0.63,
61
+ "learning_rate": 1.842635333613093e-05,
62
+ "loss": 0.3761,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 0.7,
67
+ "learning_rate": 1.8251503706812143e-05,
68
+ "loss": 0.376,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 0.77,
73
+ "learning_rate": 1.8076654077493357e-05,
74
+ "loss": 0.3789,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 0.84,
79
+ "learning_rate": 1.790180444817457e-05,
80
+ "loss": 0.3677,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 0.91,
85
+ "learning_rate": 1.7726954818855785e-05,
86
+ "loss": 0.3728,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 0.98,
91
+ "learning_rate": 1.7552105189537e-05,
92
+ "loss": 0.3748,
93
+ "step": 7000
94
+ },
95
  {
96
  "epoch": 1.0,
97
+ "eval_akurasi": 0.3444364303980067,
98
+ "eval_f1": 0.4382533261041832,
99
+ "eval_loss": 0.37478938698768616,
100
+ "eval_roc_auc": 0.6437938495836338,
101
+ "eval_runtime": 259.2098,
102
+ "eval_samples_per_second": 58.837,
103
+ "eval_steps_per_second": 7.357,
104
+ "step": 7149
105
+ },
106
+ {
107
+ "epoch": 1.05,
108
+ "learning_rate": 1.7377255560218214e-05,
109
+ "loss": 0.356,
110
+ "step": 7500
111
  },
112
  {
113
  "epoch": 1.12,
114
+ "learning_rate": 1.7202405930899428e-05,
115
+ "loss": 0.3519,
116
+ "step": 8000
117
+ },
118
+ {
119
+ "epoch": 1.19,
120
+ "learning_rate": 1.7027556301580642e-05,
121
+ "loss": 0.3475,
122
+ "step": 8500
123
  },
124
  {
125
  "epoch": 1.26,
126
+ "learning_rate": 1.6852706672261856e-05,
127
+ "loss": 0.3497,
128
+ "step": 9000
129
+ },
130
+ {
131
+ "epoch": 1.33,
132
+ "learning_rate": 1.667785704294307e-05,
133
+ "loss": 0.3467,
134
+ "step": 9500
135
  },
136
  {
137
  "epoch": 1.4,
138
+ "learning_rate": 1.6503007413624284e-05,
139
+ "loss": 0.3518,
140
+ "step": 10000
141
+ },
142
+ {
143
+ "epoch": 1.47,
144
+ "learning_rate": 1.63281577843055e-05,
145
+ "loss": 0.345,
146
+ "step": 10500
147
  },
148
  {
149
  "epoch": 1.54,
150
+ "learning_rate": 1.6153308154986713e-05,
151
+ "loss": 0.3461,
152
+ "step": 11000
153
+ },
154
+ {
155
+ "epoch": 1.61,
156
+ "learning_rate": 1.5978458525667927e-05,
157
+ "loss": 0.3439,
158
+ "step": 11500
159
  },
160
  {
161
  "epoch": 1.68,
162
+ "learning_rate": 1.580360889634914e-05,
163
+ "loss": 0.3465,
164
+ "step": 12000
165
+ },
166
+ {
167
+ "epoch": 1.75,
168
+ "learning_rate": 1.5628759267030355e-05,
169
+ "loss": 0.3478,
170
+ "step": 12500
171
  },
172
  {
173
  "epoch": 1.82,
174
+ "learning_rate": 1.545390963771157e-05,
175
+ "loss": 0.3393,
176
+ "step": 13000
177
+ },
178
+ {
179
+ "epoch": 1.89,
180
+ "learning_rate": 1.5279060008392783e-05,
181
+ "loss": 0.3432,
182
+ "step": 13500
183
  },
184
  {
185
  "epoch": 1.96,
186
+ "learning_rate": 1.5104210379073997e-05,
187
+ "loss": 0.3463,
188
+ "step": 14000
189
  },
190
  {
191
  "epoch": 2.0,
192
+ "eval_akurasi": 0.38469608550259,
193
+ "eval_f1": 0.46908843863645444,
194
+ "eval_loss": 0.36731091141700745,
195
+ "eval_roc_auc": 0.6604239066290736,
196
+ "eval_runtime": 259.0823,
197
+ "eval_samples_per_second": 58.865,
198
+ "eval_steps_per_second": 7.361,
199
+ "step": 14298
200
+ },
201
+ {
202
+ "epoch": 2.03,
203
+ "learning_rate": 1.492936074975521e-05,
204
+ "loss": 0.3333,
205
+ "step": 14500
206
  },
207
  {
208
  "epoch": 2.1,
209
+ "learning_rate": 1.4754511120436426e-05,
210
+ "loss": 0.3077,
211
+ "step": 15000
212
+ },
213
+ {
214
+ "epoch": 2.17,
215
+ "learning_rate": 1.457966149111764e-05,
216
+ "loss": 0.3109,
217
+ "step": 15500
218
  },
219
  {
220
  "epoch": 2.24,
221
+ "learning_rate": 1.4404811861798856e-05,
222
+ "loss": 0.31,
223
+ "step": 16000
224
+ },
225
+ {
226
+ "epoch": 2.31,
227
+ "learning_rate": 1.4229962232480068e-05,
228
+ "loss": 0.3054,
229
+ "step": 16500
230
  },
231
  {
232
  "epoch": 2.38,
233
+ "learning_rate": 1.4055112603161282e-05,
234
+ "loss": 0.3105,
235
+ "step": 17000
236
+ },
237
+ {
238
+ "epoch": 2.45,
239
+ "learning_rate": 1.3880262973842498e-05,
240
+ "loss": 0.3142,
241
+ "step": 17500
242
  },
243
  {
244
  "epoch": 2.52,
245
+ "learning_rate": 1.370541334452371e-05,
246
+ "loss": 0.3097,
247
+ "step": 18000
248
+ },
249
+ {
250
+ "epoch": 2.59,
251
+ "learning_rate": 1.3530563715204925e-05,
252
+ "loss": 0.3145,
253
+ "step": 18500
254
  },
255
  {
256
  "epoch": 2.66,
257
+ "learning_rate": 1.335571408588614e-05,
258
+ "loss": 0.3153,
259
+ "step": 19000
260
+ },
261
+ {
262
+ "epoch": 2.73,
263
+ "learning_rate": 1.3180864456567353e-05,
264
+ "loss": 0.3163,
265
+ "step": 19500
266
  },
267
  {
268
  "epoch": 2.8,
269
+ "learning_rate": 1.3006014827248567e-05,
270
+ "loss": 0.3044,
271
+ "step": 20000
272
+ },
273
+ {
274
+ "epoch": 2.87,
275
+ "learning_rate": 1.2831165197929783e-05,
276
+ "loss": 0.3118,
277
+ "step": 20500
278
  },
279
  {
280
  "epoch": 2.94,
281
+ "learning_rate": 1.2656315568610995e-05,
282
+ "loss": 0.3115,
283
+ "step": 21000
284
  },
285
  {
286
  "epoch": 3.0,
287
+ "eval_akurasi": 0.4235787817192315,
288
+ "eval_f1": 0.4809589398056807,
289
+ "eval_loss": 0.3863948583602905,
290
+ "eval_roc_auc": 0.6695626516294014,
291
+ "eval_runtime": 258.9039,
292
+ "eval_samples_per_second": 58.906,
293
+ "eval_steps_per_second": 7.366,
294
+ "step": 21447
295
+ },
296
+ {
297
+ "epoch": 3.01,
298
+ "learning_rate": 1.248146593929221e-05,
299
+ "loss": 0.3029,
300
+ "step": 21500
301
  },
302
  {
303
  "epoch": 3.08,
304
+ "learning_rate": 1.2306616309973425e-05,
305
+ "loss": 0.2587,
306
+ "step": 22000
307
+ },
308
+ {
309
+ "epoch": 3.15,
310
+ "learning_rate": 1.2131766680654638e-05,
311
+ "loss": 0.2592,
312
+ "step": 22500
313
  },
314
  {
315
  "epoch": 3.22,
316
+ "learning_rate": 1.1956917051335852e-05,
317
+ "loss": 0.2679,
318
+ "step": 23000
319
+ },
320
+ {
321
+ "epoch": 3.29,
322
+ "learning_rate": 1.1782067422017068e-05,
323
+ "loss": 0.2645,
324
+ "step": 23500
325
  },
326
  {
327
  "epoch": 3.36,
328
+ "learning_rate": 1.160721779269828e-05,
329
+ "loss": 0.2669,
330
+ "step": 24000
331
+ },
332
+ {
333
+ "epoch": 3.43,
334
+ "learning_rate": 1.1432368163379494e-05,
335
+ "loss": 0.2676,
336
+ "step": 24500
337
  },
338
  {
339
  "epoch": 3.5,
340
+ "learning_rate": 1.125751853406071e-05,
341
+ "loss": 0.2683,
342
+ "step": 25000
343
+ },
344
+ {
345
+ "epoch": 3.57,
346
+ "learning_rate": 1.1082668904741923e-05,
347
+ "loss": 0.272,
348
+ "step": 25500
349
  },
350
  {
351
  "epoch": 3.64,
352
+ "learning_rate": 1.0907819275423137e-05,
353
+ "loss": 0.2718,
354
+ "step": 26000
355
+ },
356
+ {
357
+ "epoch": 3.71,
358
+ "learning_rate": 1.0732969646104353e-05,
359
+ "loss": 0.2731,
360
+ "step": 26500
361
  },
362
  {
363
  "epoch": 3.78,
364
+ "learning_rate": 1.0558120016785565e-05,
365
+ "loss": 0.2641,
366
+ "step": 27000
367
+ },
368
+ {
369
+ "epoch": 3.85,
370
+ "learning_rate": 1.0383270387466779e-05,
371
+ "loss": 0.2795,
372
+ "step": 27500
373
  },
374
  {
375
  "epoch": 3.92,
376
+ "learning_rate": 1.0208420758147995e-05,
377
+ "loss": 0.2715,
378
+ "step": 28000
379
+ },
380
+ {
381
+ "epoch": 3.99,
382
+ "learning_rate": 1.0033571128829207e-05,
383
+ "loss": 0.2618,
384
+ "step": 28500
385
  },
386
  {
387
  "epoch": 4.0,
388
+ "eval_akurasi": 0.45898629598059143,
389
+ "eval_f1": 0.49651709628372404,
390
+ "eval_loss": 0.43184059858322144,
391
+ "eval_roc_auc": 0.6809307586387777,
392
+ "eval_runtime": 259.0229,
393
+ "eval_samples_per_second": 58.879,
394
+ "eval_steps_per_second": 7.362,
395
+ "step": 28596
396
  },
397
  {
398
  "epoch": 4.06,
399
+ "learning_rate": 9.858721499510422e-06,
400
+ "loss": 0.2317,
401
+ "step": 29000
402
+ },
403
+ {
404
+ "epoch": 4.13,
405
+ "learning_rate": 9.683871870191636e-06,
406
+ "loss": 0.2289,
407
+ "step": 29500
408
  },
409
  {
410
  "epoch": 4.2,
411
+ "learning_rate": 9.50902224087285e-06,
412
+ "loss": 0.2273,
413
+ "step": 30000
414
+ },
415
+ {
416
+ "epoch": 4.27,
417
+ "learning_rate": 9.334172611554064e-06,
418
+ "loss": 0.2344,
419
+ "step": 30500
420
  },
421
  {
422
  "epoch": 4.34,
423
+ "learning_rate": 9.159322982235278e-06,
424
+ "loss": 0.2256,
425
+ "step": 31000
426
+ },
427
+ {
428
+ "epoch": 4.41,
429
+ "learning_rate": 8.984473352916492e-06,
430
+ "loss": 0.2265,
431
+ "step": 31500
432
  },
433
  {
434
  "epoch": 4.48,
435
+ "learning_rate": 8.809623723597706e-06,
436
+ "loss": 0.2274,
437
+ "step": 32000
438
+ },
439
+ {
440
+ "epoch": 4.55,
441
+ "learning_rate": 8.63477409427892e-06,
442
+ "loss": 0.2315,
443
+ "step": 32500
444
  },
445
  {
446
  "epoch": 4.62,
447
+ "learning_rate": 8.459924464960135e-06,
448
+ "loss": 0.2231,
449
+ "step": 33000
450
+ },
451
+ {
452
+ "epoch": 4.69,
453
+ "learning_rate": 8.285074835641349e-06,
454
+ "loss": 0.233,
455
+ "step": 33500
456
  },
457
  {
458
  "epoch": 4.76,
459
+ "learning_rate": 8.110225206322563e-06,
460
+ "loss": 0.234,
461
+ "step": 34000
462
+ },
463
+ {
464
+ "epoch": 4.83,
465
+ "learning_rate": 7.935375577003777e-06,
466
+ "loss": 0.2237,
467
+ "step": 34500
468
  },
469
  {
470
  "epoch": 4.9,
471
+ "learning_rate": 7.760525947684991e-06,
472
+ "loss": 0.2316,
473
+ "step": 35000
474
+ },
475
+ {
476
+ "epoch": 4.97,
477
+ "learning_rate": 7.585676318366205e-06,
478
+ "loss": 0.2223,
479
+ "step": 35500
480
  },
481
  {
482
  "epoch": 5.0,
483
+ "eval_akurasi": 0.46829716084191203,
484
+ "eval_f1": 0.49530800928009977,
485
+ "eval_loss": 0.47835299372673035,
486
+ "eval_roc_auc": 0.6813979411186152,
487
+ "eval_runtime": 258.995,
488
+ "eval_samples_per_second": 58.885,
489
+ "eval_steps_per_second": 7.363,
490
+ "step": 35745
 
 
 
 
 
 
491
  },
492
  {
493
+ "epoch": 5.04,
494
+ "learning_rate": 7.41082668904742e-06,
495
+ "loss": 0.2077,
496
+ "step": 36000
497
  },
498
  {
499
+ "epoch": 5.11,
500
+ "learning_rate": 7.2359770597286336e-06,
501
+ "loss": 0.1861,
502
+ "step": 36500
503
  },
504
  {
505
+ "epoch": 5.18,
506
+ "learning_rate": 7.061127430409848e-06,
507
+ "loss": 0.1971,
508
+ "step": 37000
509
  },
510
  {
511
+ "epoch": 5.25,
512
+ "learning_rate": 6.886277801091063e-06,
513
+ "loss": 0.196,
514
+ "step": 37500
515
  },
516
  {
517
+ "epoch": 5.32,
518
+ "learning_rate": 6.711428171772276e-06,
519
+ "loss": 0.1908,
520
+ "step": 38000
521
+ },
522
+ {
523
+ "epoch": 5.39,
524
+ "learning_rate": 6.53657854245349e-06,
525
+ "loss": 0.1911,
526
+ "step": 38500
527
+ },
528
+ {
529
+ "epoch": 5.46,
530
+ "learning_rate": 6.361728913134705e-06,
531
+ "loss": 0.1903,
532
+ "step": 39000
533
+ },
534
+ {
535
+ "epoch": 5.53,
536
+ "learning_rate": 6.186879283815918e-06,
537
+ "loss": 0.1976,
538
+ "step": 39500
539
+ },
540
+ {
541
+ "epoch": 5.6,
542
+ "learning_rate": 6.012029654497133e-06,
543
+ "loss": 0.1971,
544
+ "step": 40000
545
+ },
546
+ {
547
+ "epoch": 5.67,
548
+ "learning_rate": 5.8371800251783475e-06,
549
+ "loss": 0.1928,
550
+ "step": 40500
551
+ },
552
+ {
553
+ "epoch": 5.74,
554
+ "learning_rate": 5.662330395859561e-06,
555
+ "loss": 0.1927,
556
+ "step": 41000
557
+ },
558
+ {
559
+ "epoch": 5.81,
560
+ "learning_rate": 5.487480766540776e-06,
561
+ "loss": 0.1917,
562
+ "step": 41500
563
  },
564
  {
565
  "epoch": 5.87,
566
+ "learning_rate": 5.31263113722199e-06,
567
+ "loss": 0.2022,
568
+ "step": 42000
569
+ },
570
+ {
571
+ "epoch": 5.94,
572
+ "learning_rate": 5.137781507903203e-06,
573
+ "loss": 0.1988,
574
+ "step": 42500
575
  },
576
  {
577
  "epoch": 6.0,
578
+ "eval_akurasi": 0.4731493016851354,
579
+ "eval_f1": 0.496174038362557,
580
+ "eval_loss": 0.5363304018974304,
581
+ "eval_roc_auc": 0.6824306602845714,
582
+ "eval_runtime": 259.1769,
583
+ "eval_samples_per_second": 58.844,
584
+ "eval_steps_per_second": 7.358,
585
+ "step": 42894
586
  }
587
  ],
588
  "logging_steps": 500,
589
+ "max_steps": 57192,
590
+ "num_train_epochs": 8,
591
  "save_steps": 500,
592
+ "total_flos": 6.77171376171049e+16,
593
  "trial_name": null,
594
  "trial_params": null
595
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75cfa005ff68f329bd629346f5624f4aab8e968cc9ff3ad03c353bfa81cddb41
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9a439502e64b076bc34244bcf4c6cc9e5e81fb300921726e1d8aa752c17d504
3
  size 4091