akahana commited on
Commit
28da4a3
1 Parent(s): 34b582f

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: roberta-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # roberta-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: roberta-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.47418153806650404
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # roberta-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 3.3326
32
+ - Accuracy: 0.4742
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.4158020342790051,
4
- "eval_loss": 3.806352376937866,
5
- "eval_runtime": 29.338,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 138.149,
8
- "eval_steps_per_second": 34.563,
9
- "perplexity": 44.98604708614023,
10
- "total_flos": 7.919584575053184e+16,
11
- "train_loss": 1.1771604976443562,
12
- "train_runtime": 4937.567,
13
  "train_samples": 80219,
14
- "train_samples_per_second": 243.7,
15
- "train_steps_per_second": 15.232
16
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.47418153806650404,
4
+ "eval_loss": 3.3325774669647217,
5
+ "eval_runtime": 29.361,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 138.04,
8
+ "eval_steps_per_second": 34.536,
9
+ "perplexity": 28.01044473770335,
10
+ "total_flos": 1.0559446100070912e+17,
11
+ "train_loss": 0.8717835233465437,
12
+ "train_runtime": 7516.252,
13
  "train_samples": 80219,
14
+ "train_samples_per_second": 213.455,
15
+ "train_steps_per_second": 13.342
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.4158020342790051,
4
- "eval_loss": 3.806352376937866,
5
- "eval_runtime": 29.338,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 138.149,
8
- "eval_steps_per_second": 34.563,
9
- "perplexity": 44.98604708614023
10
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "eval_accuracy": 0.47418153806650404,
4
+ "eval_loss": 3.3325774669647217,
5
+ "eval_runtime": 29.361,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 138.04,
8
+ "eval_steps_per_second": 34.536,
9
+ "perplexity": 28.01044473770335
10
  }
runs/Jul15_01-09-56_bee445b9fed3/events.out.tfevents.1721013478.bee445b9fed3.1176.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a73c4ae31ef55c1f913a3859cad46ceb372a87e8c3124c1f2abb0969ec21e31
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 15.0,
3
- "total_flos": 7.919584575053184e+16,
4
- "train_loss": 1.1771604976443562,
5
- "train_runtime": 4937.567,
6
  "train_samples": 80219,
7
- "train_samples_per_second": 243.7,
8
- "train_steps_per_second": 15.232
9
  }
 
1
  {
2
+ "epoch": 20.0,
3
+ "total_flos": 1.0559446100070912e+17,
4
+ "train_loss": 0.8717835233465437,
5
+ "train_runtime": 7516.252,
6
  "train_samples": 80219,
7
+ "train_samples_per_second": 213.455,
8
+ "train_steps_per_second": 13.342
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
  "eval_steps": 500,
6
- "global_step": 75210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1084,12 +1084,371 @@
1084
  "train_runtime": 4937.567,
1085
  "train_samples_per_second": 243.7,
1086
  "train_steps_per_second": 15.232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1087
  }
1088
  ],
1089
  "logging_steps": 500,
1090
- "max_steps": 75210,
1091
  "num_input_tokens_seen": 0,
1092
- "num_train_epochs": 15,
1093
  "save_steps": 500,
1094
  "stateful_callbacks": {
1095
  "TrainerControl": {
@@ -1103,7 +1462,7 @@
1103
  "attributes": {}
1104
  }
1105
  },
1106
- "total_flos": 7.919584575053184e+16,
1107
  "train_batch_size": 16,
1108
  "trial_name": null,
1109
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
  "eval_steps": 500,
6
+ "global_step": 100280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1084
  "train_runtime": 4937.567,
1085
  "train_samples_per_second": 243.7,
1086
  "train_steps_per_second": 15.232
1087
+ },
1088
+ {
1089
+ "epoch": 15.05783805345034,
1090
+ "grad_norm": 6.581785202026367,
1091
+ "learning_rate": 4.985540486637415e-05,
1092
+ "loss": 3.8491,
1093
+ "step": 75500
1094
+ },
1095
+ {
1096
+ "epoch": 15.157558835261268,
1097
+ "grad_norm": 6.372396469116211,
1098
+ "learning_rate": 4.960610291184683e-05,
1099
+ "loss": 3.8838,
1100
+ "step": 76000
1101
+ },
1102
+ {
1103
+ "epoch": 15.257279617072198,
1104
+ "grad_norm": 6.738864421844482,
1105
+ "learning_rate": 4.935680095731951e-05,
1106
+ "loss": 3.8834,
1107
+ "step": 76500
1108
+ },
1109
+ {
1110
+ "epoch": 15.357000398883127,
1111
+ "grad_norm": 6.700061798095703,
1112
+ "learning_rate": 4.910749900279219e-05,
1113
+ "loss": 3.8559,
1114
+ "step": 77000
1115
+ },
1116
+ {
1117
+ "epoch": 15.456721180694057,
1118
+ "grad_norm": 6.3839497566223145,
1119
+ "learning_rate": 4.8858197048264857e-05,
1120
+ "loss": 3.8275,
1121
+ "step": 77500
1122
+ },
1123
+ {
1124
+ "epoch": 15.556441962504985,
1125
+ "grad_norm": 6.165511131286621,
1126
+ "learning_rate": 4.860889509373754e-05,
1127
+ "loss": 3.803,
1128
+ "step": 78000
1129
+ },
1130
+ {
1131
+ "epoch": 15.656162744315916,
1132
+ "grad_norm": 5.800929069519043,
1133
+ "learning_rate": 4.8359593139210215e-05,
1134
+ "loss": 3.8,
1135
+ "step": 78500
1136
+ },
1137
+ {
1138
+ "epoch": 15.755883526126844,
1139
+ "grad_norm": 6.714051246643066,
1140
+ "learning_rate": 4.811029118468289e-05,
1141
+ "loss": 3.797,
1142
+ "step": 79000
1143
+ },
1144
+ {
1145
+ "epoch": 15.855604307937774,
1146
+ "grad_norm": 6.74777889251709,
1147
+ "learning_rate": 4.786098923015557e-05,
1148
+ "loss": 3.7759,
1149
+ "step": 79500
1150
+ },
1151
+ {
1152
+ "epoch": 15.955325089748703,
1153
+ "grad_norm": 6.980929374694824,
1154
+ "learning_rate": 4.761168727562824e-05,
1155
+ "loss": 3.7445,
1156
+ "step": 80000
1157
+ },
1158
+ {
1159
+ "epoch": 16.05504587155963,
1160
+ "grad_norm": 6.54088020324707,
1161
+ "learning_rate": 4.736238532110092e-05,
1162
+ "loss": 3.6805,
1163
+ "step": 80500
1164
+ },
1165
+ {
1166
+ "epoch": 16.15476665337056,
1167
+ "grad_norm": 5.999478340148926,
1168
+ "learning_rate": 4.7113083366573594e-05,
1169
+ "loss": 3.6537,
1170
+ "step": 81000
1171
+ },
1172
+ {
1173
+ "epoch": 16.254487435181492,
1174
+ "grad_norm": 6.384885311126709,
1175
+ "learning_rate": 4.686378141204627e-05,
1176
+ "loss": 3.6522,
1177
+ "step": 81500
1178
+ },
1179
+ {
1180
+ "epoch": 16.354208216992422,
1181
+ "grad_norm": 6.624803066253662,
1182
+ "learning_rate": 4.661447945751895e-05,
1183
+ "loss": 3.6302,
1184
+ "step": 82000
1185
+ },
1186
+ {
1187
+ "epoch": 16.453928998803352,
1188
+ "grad_norm": 6.454346656799316,
1189
+ "learning_rate": 4.636567610690068e-05,
1190
+ "loss": 3.6179,
1191
+ "step": 82500
1192
+ },
1193
+ {
1194
+ "epoch": 16.55364978061428,
1195
+ "grad_norm": 6.266842365264893,
1196
+ "learning_rate": 4.611637415237336e-05,
1197
+ "loss": 3.6265,
1198
+ "step": 83000
1199
+ },
1200
+ {
1201
+ "epoch": 16.65337056242521,
1202
+ "grad_norm": 6.608065128326416,
1203
+ "learning_rate": 4.5867072197846036e-05,
1204
+ "loss": 3.6105,
1205
+ "step": 83500
1206
+ },
1207
+ {
1208
+ "epoch": 16.75309134423614,
1209
+ "grad_norm": 6.4489426612854,
1210
+ "learning_rate": 4.5617770243318705e-05,
1211
+ "loss": 3.5994,
1212
+ "step": 84000
1213
+ },
1214
+ {
1215
+ "epoch": 16.85281212604707,
1216
+ "grad_norm": 6.433938503265381,
1217
+ "learning_rate": 4.536896689270044e-05,
1218
+ "loss": 3.5648,
1219
+ "step": 84500
1220
+ },
1221
+ {
1222
+ "epoch": 16.952532907857996,
1223
+ "grad_norm": 7.4558610916137695,
1224
+ "learning_rate": 4.511966493817312e-05,
1225
+ "loss": 3.5746,
1226
+ "step": 85000
1227
+ },
1228
+ {
1229
+ "epoch": 17.052253689668927,
1230
+ "grad_norm": 5.742049217224121,
1231
+ "learning_rate": 4.4870362983645795e-05,
1232
+ "loss": 3.5378,
1233
+ "step": 85500
1234
+ },
1235
+ {
1236
+ "epoch": 17.151974471479857,
1237
+ "grad_norm": 6.346868515014648,
1238
+ "learning_rate": 4.462106102911847e-05,
1239
+ "loss": 3.505,
1240
+ "step": 86000
1241
+ },
1242
+ {
1243
+ "epoch": 17.251695253290787,
1244
+ "grad_norm": 6.252668857574463,
1245
+ "learning_rate": 4.4371759074591147e-05,
1246
+ "loss": 3.4787,
1247
+ "step": 86500
1248
+ },
1249
+ {
1250
+ "epoch": 17.351416035101714,
1251
+ "grad_norm": 6.237195014953613,
1252
+ "learning_rate": 4.412245712006383e-05,
1253
+ "loss": 3.4914,
1254
+ "step": 87000
1255
+ },
1256
+ {
1257
+ "epoch": 17.451136816912644,
1258
+ "grad_norm": 7.106077194213867,
1259
+ "learning_rate": 4.3873653769445554e-05,
1260
+ "loss": 3.4641,
1261
+ "step": 87500
1262
+ },
1263
+ {
1264
+ "epoch": 17.550857598723574,
1265
+ "grad_norm": 7.160710334777832,
1266
+ "learning_rate": 4.362435181491823e-05,
1267
+ "loss": 3.4419,
1268
+ "step": 88000
1269
+ },
1270
+ {
1271
+ "epoch": 17.650578380534505,
1272
+ "grad_norm": 7.160135746002197,
1273
+ "learning_rate": 4.337504986039091e-05,
1274
+ "loss": 3.4604,
1275
+ "step": 88500
1276
+ },
1277
+ {
1278
+ "epoch": 17.75029916234543,
1279
+ "grad_norm": 6.785101890563965,
1280
+ "learning_rate": 4.312574790586358e-05,
1281
+ "loss": 3.432,
1282
+ "step": 89000
1283
+ },
1284
+ {
1285
+ "epoch": 17.85001994415636,
1286
+ "grad_norm": 5.990314960479736,
1287
+ "learning_rate": 4.287644595133626e-05,
1288
+ "loss": 3.4045,
1289
+ "step": 89500
1290
+ },
1291
+ {
1292
+ "epoch": 17.949740725967292,
1293
+ "grad_norm": 6.434844493865967,
1294
+ "learning_rate": 4.2627642600717995e-05,
1295
+ "loss": 3.4236,
1296
+ "step": 90000
1297
+ },
1298
+ {
1299
+ "epoch": 18.049461507778222,
1300
+ "grad_norm": 6.7937774658203125,
1301
+ "learning_rate": 4.2378340646190664e-05,
1302
+ "loss": 3.3902,
1303
+ "step": 90500
1304
+ },
1305
+ {
1306
+ "epoch": 18.14918228958915,
1307
+ "grad_norm": 7.1783576011657715,
1308
+ "learning_rate": 4.212903869166335e-05,
1309
+ "loss": 3.3545,
1310
+ "step": 91000
1311
+ },
1312
+ {
1313
+ "epoch": 18.24890307140008,
1314
+ "grad_norm": 6.374876022338867,
1315
+ "learning_rate": 4.187973673713602e-05,
1316
+ "loss": 3.3451,
1317
+ "step": 91500
1318
+ },
1319
+ {
1320
+ "epoch": 18.34862385321101,
1321
+ "grad_norm": 6.49647331237793,
1322
+ "learning_rate": 4.163093338651775e-05,
1323
+ "loss": 3.3452,
1324
+ "step": 92000
1325
+ },
1326
+ {
1327
+ "epoch": 18.44834463502194,
1328
+ "grad_norm": 6.785512924194336,
1329
+ "learning_rate": 4.138163143199043e-05,
1330
+ "loss": 3.3102,
1331
+ "step": 92500
1332
+ },
1333
+ {
1334
+ "epoch": 18.54806541683287,
1335
+ "grad_norm": 6.842392921447754,
1336
+ "learning_rate": 4.1132329477463106e-05,
1337
+ "loss": 3.3376,
1338
+ "step": 93000
1339
+ },
1340
+ {
1341
+ "epoch": 18.647786198643796,
1342
+ "grad_norm": 7.126637935638428,
1343
+ "learning_rate": 4.088302752293578e-05,
1344
+ "loss": 3.3249,
1345
+ "step": 93500
1346
+ },
1347
+ {
1348
+ "epoch": 18.747506980454727,
1349
+ "grad_norm": 5.808903217315674,
1350
+ "learning_rate": 4.063372556840846e-05,
1351
+ "loss": 3.2808,
1352
+ "step": 94000
1353
+ },
1354
+ {
1355
+ "epoch": 18.847227762265657,
1356
+ "grad_norm": 6.2346954345703125,
1357
+ "learning_rate": 4.0385420821699245e-05,
1358
+ "loss": 3.3189,
1359
+ "step": 94500
1360
+ },
1361
+ {
1362
+ "epoch": 18.946948544076587,
1363
+ "grad_norm": 6.60822057723999,
1364
+ "learning_rate": 4.013611886717192e-05,
1365
+ "loss": 3.3143,
1366
+ "step": 95000
1367
+ },
1368
+ {
1369
+ "epoch": 19.046669325887514,
1370
+ "grad_norm": 6.471176624298096,
1371
+ "learning_rate": 3.9886816912644597e-05,
1372
+ "loss": 3.2855,
1373
+ "step": 95500
1374
+ },
1375
+ {
1376
+ "epoch": 19.146390107698444,
1377
+ "grad_norm": 6.365059852600098,
1378
+ "learning_rate": 3.963751495811727e-05,
1379
+ "loss": 3.2616,
1380
+ "step": 96000
1381
+ },
1382
+ {
1383
+ "epoch": 19.246110889509374,
1384
+ "grad_norm": 6.250296592712402,
1385
+ "learning_rate": 3.9388213003589955e-05,
1386
+ "loss": 3.226,
1387
+ "step": 96500
1388
+ },
1389
+ {
1390
+ "epoch": 19.345831671320305,
1391
+ "grad_norm": 6.003506660461426,
1392
+ "learning_rate": 3.9138911049062624e-05,
1393
+ "loss": 3.2352,
1394
+ "step": 97000
1395
+ },
1396
+ {
1397
+ "epoch": 19.44555245313123,
1398
+ "grad_norm": 5.75541353225708,
1399
+ "learning_rate": 3.88896090945353e-05,
1400
+ "loss": 3.2395,
1401
+ "step": 97500
1402
+ },
1403
+ {
1404
+ "epoch": 19.54527323494216,
1405
+ "grad_norm": 6.684996604919434,
1406
+ "learning_rate": 3.864030714000798e-05,
1407
+ "loss": 3.2272,
1408
+ "step": 98000
1409
+ },
1410
+ {
1411
+ "epoch": 19.644994016753092,
1412
+ "grad_norm": 5.906820297241211,
1413
+ "learning_rate": 3.839100518548066e-05,
1414
+ "loss": 3.2096,
1415
+ "step": 98500
1416
+ },
1417
+ {
1418
+ "epoch": 19.744714798564022,
1419
+ "grad_norm": 6.240872383117676,
1420
+ "learning_rate": 3.814220183486238e-05,
1421
+ "loss": 3.2016,
1422
+ "step": 99000
1423
+ },
1424
+ {
1425
+ "epoch": 19.84443558037495,
1426
+ "grad_norm": 6.751197338104248,
1427
+ "learning_rate": 3.7892899880335066e-05,
1428
+ "loss": 3.2141,
1429
+ "step": 99500
1430
+ },
1431
+ {
1432
+ "epoch": 19.94415636218588,
1433
+ "grad_norm": 6.535121917724609,
1434
+ "learning_rate": 3.764359792580774e-05,
1435
+ "loss": 3.1829,
1436
+ "step": 100000
1437
+ },
1438
+ {
1439
+ "epoch": 20.0,
1440
+ "step": 100280,
1441
+ "total_flos": 1.0559446100070912e+17,
1442
+ "train_loss": 0.8717835233465437,
1443
+ "train_runtime": 7516.252,
1444
+ "train_samples_per_second": 213.455,
1445
+ "train_steps_per_second": 13.342
1446
  }
1447
  ],
1448
  "logging_steps": 500,
1449
+ "max_steps": 100280,
1450
  "num_input_tokens_seen": 0,
1451
+ "num_train_epochs": 20,
1452
  "save_steps": 500,
1453
  "stateful_callbacks": {
1454
  "TrainerControl": {
 
1462
  "attributes": {}
1463
  }
1464
  },
1465
+ "total_flos": 1.0559446100070912e+17,
1466
  "train_batch_size": 16,
1467
  "trial_name": null,
1468
  "trial_params": null