Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +416 -2
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9457000
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c0561e7ed4d443d1f7d722a0e016b22f4a79764883f29b0122abf03eae1c5ff1
|
3 |
size 9457000
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 18959674
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:029ab3a32b62f8cfa70402fb5a58342fc41f36f6d7f46eba10716734a8fbc3f6
|
3 |
size 18959674
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12534d7d85158ad1e24da867bd6732e982602f6d1d21ce2a4da4ae53f2b517d3
|
3 |
size 14180
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:391a1d3d730c41dd9c6567f59f3dba0d5b6ed61dbd6f102cc6921f5a5ecd6965
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 200,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1341,6 +1341,420 @@
|
|
1341 |
"eval_samples_per_second": 1.134,
|
1342 |
"eval_steps_per_second": 0.567,
|
1343 |
"step": 5800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1344 |
}
|
1345 |
],
|
1346 |
"logging_steps": 100,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.9618082618862043,
|
5 |
"eval_steps": 200,
|
6 |
+
"global_step": 7600,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1341 |
"eval_samples_per_second": 1.134,
|
1342 |
"eval_steps_per_second": 0.567,
|
1343 |
"step": 5800
|
1344 |
+
},
|
1345 |
+
{
|
1346 |
+
"epoch": 2.2992985190958692,
|
1347 |
+
"grad_norm": 0.003995438106358051,
|
1348 |
+
"learning_rate": 6.764888892618867e-05,
|
1349 |
+
"logits/chosen": -19.462810516357422,
|
1350 |
+
"logits/rejected": -18.417314529418945,
|
1351 |
+
"logps/chosen": -375.923583984375,
|
1352 |
+
"logps/rejected": -394.6671447753906,
|
1353 |
+
"loss": 0.0051,
|
1354 |
+
"rewards/accuracies": 0.9937499761581421,
|
1355 |
+
"rewards/chosen": -1.9151861667633057,
|
1356 |
+
"rewards/margins": 11.908782005310059,
|
1357 |
+
"rewards/rejected": -13.823966026306152,
|
1358 |
+
"step": 5900
|
1359 |
+
},
|
1360 |
+
{
|
1361 |
+
"epoch": 2.338269680436477,
|
1362 |
+
"grad_norm": 0.003454476362094283,
|
1363 |
+
"learning_rate": 6.064541531901838e-05,
|
1364 |
+
"logits/chosen": -19.41205406188965,
|
1365 |
+
"logits/rejected": -18.35369873046875,
|
1366 |
+
"logps/chosen": -374.3619384765625,
|
1367 |
+
"logps/rejected": -392.4114990234375,
|
1368 |
+
"loss": 0.004,
|
1369 |
+
"rewards/accuracies": 0.9962499737739563,
|
1370 |
+
"rewards/chosen": -2.0450844764709473,
|
1371 |
+
"rewards/margins": 11.8558988571167,
|
1372 |
+
"rewards/rejected": -13.900982856750488,
|
1373 |
+
"step": 6000
|
1374 |
+
},
|
1375 |
+
{
|
1376 |
+
"epoch": 2.338269680436477,
|
1377 |
+
"eval_logits/chosen": -15.477252960205078,
|
1378 |
+
"eval_logits/rejected": -15.445527076721191,
|
1379 |
+
"eval_logps/chosen": -347.4806213378906,
|
1380 |
+
"eval_logps/rejected": -386.5325927734375,
|
1381 |
+
"eval_loss": 0.00025467213708907366,
|
1382 |
+
"eval_rewards/accuracies": 1.0,
|
1383 |
+
"eval_rewards/chosen": -0.5821936130523682,
|
1384 |
+
"eval_rewards/margins": 11.38007926940918,
|
1385 |
+
"eval_rewards/rejected": -11.962273597717285,
|
1386 |
+
"eval_runtime": 183.2231,
|
1387 |
+
"eval_samples_per_second": 1.135,
|
1388 |
+
"eval_steps_per_second": 0.568,
|
1389 |
+
"step": 6000
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"epoch": 2.377240841777085,
|
1393 |
+
"grad_norm": 0.0025488168466836214,
|
1394 |
+
"learning_rate": 5.397431121068011e-05,
|
1395 |
+
"logits/chosen": -19.279691696166992,
|
1396 |
+
"logits/rejected": -18.215682983398438,
|
1397 |
+
"logps/chosen": -391.84844970703125,
|
1398 |
+
"logps/rejected": -398.8818359375,
|
1399 |
+
"loss": 0.003,
|
1400 |
+
"rewards/accuracies": 0.9962499737739563,
|
1401 |
+
"rewards/chosen": -2.172652006149292,
|
1402 |
+
"rewards/margins": 12.266448020935059,
|
1403 |
+
"rewards/rejected": -14.439099311828613,
|
1404 |
+
"step": 6100
|
1405 |
+
},
|
1406 |
+
{
|
1407 |
+
"epoch": 2.416212003117693,
|
1408 |
+
"grad_norm": 0.0001306094927713275,
|
1409 |
+
"learning_rate": 4.7647286228024944e-05,
|
1410 |
+
"logits/chosen": -19.264507293701172,
|
1411 |
+
"logits/rejected": -18.2991886138916,
|
1412 |
+
"logps/chosen": -368.3695373535156,
|
1413 |
+
"logps/rejected": -395.1746826171875,
|
1414 |
+
"loss": 0.0021,
|
1415 |
+
"rewards/accuracies": 0.9975000023841858,
|
1416 |
+
"rewards/chosen": -2.2215933799743652,
|
1417 |
+
"rewards/margins": 12.452881813049316,
|
1418 |
+
"rewards/rejected": -14.674474716186523,
|
1419 |
+
"step": 6200
|
1420 |
+
},
|
1421 |
+
{
|
1422 |
+
"epoch": 2.416212003117693,
|
1423 |
+
"eval_logits/chosen": -15.429259300231934,
|
1424 |
+
"eval_logits/rejected": -15.400547981262207,
|
1425 |
+
"eval_logps/chosen": -350.2945556640625,
|
1426 |
+
"eval_logps/rejected": -390.7493896484375,
|
1427 |
+
"eval_loss": 0.0002859699307009578,
|
1428 |
+
"eval_rewards/accuracies": 1.0,
|
1429 |
+
"eval_rewards/chosen": -0.8635876178741455,
|
1430 |
+
"eval_rewards/margins": 11.520365715026855,
|
1431 |
+
"eval_rewards/rejected": -12.383952140808105,
|
1432 |
+
"eval_runtime": 183.7971,
|
1433 |
+
"eval_samples_per_second": 1.132,
|
1434 |
+
"eval_steps_per_second": 0.566,
|
1435 |
+
"step": 6200
|
1436 |
+
},
|
1437 |
+
{
|
1438 |
+
"epoch": 2.455183164458301,
|
1439 |
+
"grad_norm": 0.0013745080213993788,
|
1440 |
+
"learning_rate": 4.167544604418003e-05,
|
1441 |
+
"logits/chosen": -19.316665649414062,
|
1442 |
+
"logits/rejected": -18.3441162109375,
|
1443 |
+
"logps/chosen": -378.2589416503906,
|
1444 |
+
"logps/rejected": -407.154052734375,
|
1445 |
+
"loss": 0.0012,
|
1446 |
+
"rewards/accuracies": 0.9987499713897705,
|
1447 |
+
"rewards/chosen": -2.0685226917266846,
|
1448 |
+
"rewards/margins": 12.507061958312988,
|
1449 |
+
"rewards/rejected": -14.575584411621094,
|
1450 |
+
"step": 6300
|
1451 |
+
},
|
1452 |
+
{
|
1453 |
+
"epoch": 2.4941543257989087,
|
1454 |
+
"grad_norm": 0.002951019676402211,
|
1455 |
+
"learning_rate": 3.6069272885030256e-05,
|
1456 |
+
"logits/chosen": -19.417644500732422,
|
1457 |
+
"logits/rejected": -18.45380210876465,
|
1458 |
+
"logps/chosen": -384.16253662109375,
|
1459 |
+
"logps/rejected": -404.4368896484375,
|
1460 |
+
"loss": 0.0038,
|
1461 |
+
"rewards/accuracies": 0.9975000023841858,
|
1462 |
+
"rewards/chosen": -2.470909357070923,
|
1463 |
+
"rewards/margins": 12.393091201782227,
|
1464 |
+
"rewards/rejected": -14.864001274108887,
|
1465 |
+
"step": 6400
|
1466 |
+
},
|
1467 |
+
{
|
1468 |
+
"epoch": 2.4941543257989087,
|
1469 |
+
"eval_logits/chosen": -15.429072380065918,
|
1470 |
+
"eval_logits/rejected": -15.39948844909668,
|
1471 |
+
"eval_logps/chosen": -350.1909484863281,
|
1472 |
+
"eval_logps/rejected": -391.09564208984375,
|
1473 |
+
"eval_loss": 0.00028344389284029603,
|
1474 |
+
"eval_rewards/accuracies": 1.0,
|
1475 |
+
"eval_rewards/chosen": -0.8532273769378662,
|
1476 |
+
"eval_rewards/margins": 11.565349578857422,
|
1477 |
+
"eval_rewards/rejected": -12.418577194213867,
|
1478 |
+
"eval_runtime": 183.8395,
|
1479 |
+
"eval_samples_per_second": 1.131,
|
1480 |
+
"eval_steps_per_second": 0.566,
|
1481 |
+
"step": 6400
|
1482 |
+
},
|
1483 |
+
{
|
1484 |
+
"epoch": 2.533125487139517,
|
1485 |
+
"grad_norm": 0.011729140765964985,
|
1486 |
+
"learning_rate": 3.083860713002276e-05,
|
1487 |
+
"logits/chosen": -19.238452911376953,
|
1488 |
+
"logits/rejected": -18.347043991088867,
|
1489 |
+
"logps/chosen": -370.990478515625,
|
1490 |
+
"logps/rejected": -400.6825256347656,
|
1491 |
+
"loss": 0.0039,
|
1492 |
+
"rewards/accuracies": 0.9950000047683716,
|
1493 |
+
"rewards/chosen": -2.3387069702148438,
|
1494 |
+
"rewards/margins": 12.353742599487305,
|
1495 |
+
"rewards/rejected": -14.692447662353516,
|
1496 |
+
"step": 6500
|
1497 |
+
},
|
1498 |
+
{
|
1499 |
+
"epoch": 2.572096648480125,
|
1500 |
+
"grad_norm": 0.014655795879662037,
|
1501 |
+
"learning_rate": 2.5992630039587377e-05,
|
1502 |
+
"logits/chosen": -19.383378982543945,
|
1503 |
+
"logits/rejected": -18.424985885620117,
|
1504 |
+
"logps/chosen": -380.931396484375,
|
1505 |
+
"logps/rejected": -399.5527648925781,
|
1506 |
+
"loss": 0.004,
|
1507 |
+
"rewards/accuracies": 0.9950000047683716,
|
1508 |
+
"rewards/chosen": -2.246741771697998,
|
1509 |
+
"rewards/margins": 12.052864074707031,
|
1510 |
+
"rewards/rejected": -14.299607276916504,
|
1511 |
+
"step": 6600
|
1512 |
+
},
|
1513 |
+
{
|
1514 |
+
"epoch": 2.572096648480125,
|
1515 |
+
"eval_logits/chosen": -15.413681983947754,
|
1516 |
+
"eval_logits/rejected": -15.38673210144043,
|
1517 |
+
"eval_logps/chosen": -350.96832275390625,
|
1518 |
+
"eval_logps/rejected": -392.92138671875,
|
1519 |
+
"eval_loss": 0.0002713745925575495,
|
1520 |
+
"eval_rewards/accuracies": 1.0,
|
1521 |
+
"eval_rewards/chosen": -0.9309618473052979,
|
1522 |
+
"eval_rewards/margins": 11.670186996459961,
|
1523 |
+
"eval_rewards/rejected": -12.601149559020996,
|
1524 |
+
"eval_runtime": 183.927,
|
1525 |
+
"eval_samples_per_second": 1.131,
|
1526 |
+
"eval_steps_per_second": 0.565,
|
1527 |
+
"step": 6600
|
1528 |
+
},
|
1529 |
+
{
|
1530 |
+
"epoch": 2.6110678098207325,
|
1531 |
+
"grad_norm": 0.007622725795954466,
|
1532 |
+
"learning_rate": 2.153984763949371e-05,
|
1533 |
+
"logits/chosen": -19.36250114440918,
|
1534 |
+
"logits/rejected": -18.28182029724121,
|
1535 |
+
"logps/chosen": -383.64678955078125,
|
1536 |
+
"logps/rejected": -392.4764404296875,
|
1537 |
+
"loss": 0.0051,
|
1538 |
+
"rewards/accuracies": 0.9950000047683716,
|
1539 |
+
"rewards/chosen": -2.286069631576538,
|
1540 |
+
"rewards/margins": 12.199385643005371,
|
1541 |
+
"rewards/rejected": -14.485455513000488,
|
1542 |
+
"step": 6700
|
1543 |
+
},
|
1544 |
+
{
|
1545 |
+
"epoch": 2.6500389711613406,
|
1546 |
+
"grad_norm": 0.01298923883587122,
|
1547 |
+
"learning_rate": 1.7488075790430934e-05,
|
1548 |
+
"logits/chosen": -19.342336654663086,
|
1549 |
+
"logits/rejected": -18.265600204467773,
|
1550 |
+
"logps/chosen": -386.0182800292969,
|
1551 |
+
"logps/rejected": -406.8291015625,
|
1552 |
+
"loss": 0.0025,
|
1553 |
+
"rewards/accuracies": 0.9975000023841858,
|
1554 |
+
"rewards/chosen": -2.360260009765625,
|
1555 |
+
"rewards/margins": 12.328630447387695,
|
1556 |
+
"rewards/rejected": -14.68889045715332,
|
1557 |
+
"step": 6800
|
1558 |
+
},
|
1559 |
+
{
|
1560 |
+
"epoch": 2.6500389711613406,
|
1561 |
+
"eval_logits/chosen": -15.421009063720703,
|
1562 |
+
"eval_logits/rejected": -15.389812469482422,
|
1563 |
+
"eval_logps/chosen": -350.44403076171875,
|
1564 |
+
"eval_logps/rejected": -392.24505615234375,
|
1565 |
+
"eval_loss": 0.00027188131934963167,
|
1566 |
+
"eval_rewards/accuracies": 1.0,
|
1567 |
+
"eval_rewards/chosen": -0.8785340785980225,
|
1568 |
+
"eval_rewards/margins": 11.654979705810547,
|
1569 |
+
"eval_rewards/rejected": -12.533516883850098,
|
1570 |
+
"eval_runtime": 183.8793,
|
1571 |
+
"eval_samples_per_second": 1.131,
|
1572 |
+
"eval_steps_per_second": 0.566,
|
1573 |
+
"step": 6800
|
1574 |
+
},
|
1575 |
+
{
|
1576 |
+
"epoch": 2.6890101325019486,
|
1577 |
+
"grad_norm": 0.001023565884679556,
|
1578 |
+
"learning_rate": 1.3844426469017707e-05,
|
1579 |
+
"logits/chosen": -19.42437171936035,
|
1580 |
+
"logits/rejected": -18.330951690673828,
|
1581 |
+
"logps/chosen": -389.0,
|
1582 |
+
"logps/rejected": -406.2248840332031,
|
1583 |
+
"loss": 0.0029,
|
1584 |
+
"rewards/accuracies": 0.9962499737739563,
|
1585 |
+
"rewards/chosen": -2.3574695587158203,
|
1586 |
+
"rewards/margins": 12.130472183227539,
|
1587 |
+
"rewards/rejected": -14.48794174194336,
|
1588 |
+
"step": 6900
|
1589 |
+
},
|
1590 |
+
{
|
1591 |
+
"epoch": 2.7279812938425563,
|
1592 |
+
"grad_norm": 0.030132969841361046,
|
1593 |
+
"learning_rate": 1.061529528432198e-05,
|
1594 |
+
"logits/chosen": -19.441736221313477,
|
1595 |
+
"logits/rejected": -18.430885314941406,
|
1596 |
+
"logps/chosen": -375.5260009765625,
|
1597 |
+
"logps/rejected": -398.24298095703125,
|
1598 |
+
"loss": 0.0012,
|
1599 |
+
"rewards/accuracies": 0.9987499713897705,
|
1600 |
+
"rewards/chosen": -2.37859845161438,
|
1601 |
+
"rewards/margins": 12.304988861083984,
|
1602 |
+
"rewards/rejected": -14.683588981628418,
|
1603 |
+
"step": 7000
|
1604 |
+
},
|
1605 |
+
{
|
1606 |
+
"epoch": 2.7279812938425563,
|
1607 |
+
"eval_logits/chosen": -15.412691116333008,
|
1608 |
+
"eval_logits/rejected": -15.382089614868164,
|
1609 |
+
"eval_logps/chosen": -351.04168701171875,
|
1610 |
+
"eval_logps/rejected": -393.3232421875,
|
1611 |
+
"eval_loss": 0.00028384948382154107,
|
1612 |
+
"eval_rewards/accuracies": 1.0,
|
1613 |
+
"eval_rewards/chosen": -0.9383015036582947,
|
1614 |
+
"eval_rewards/margins": 11.703031539916992,
|
1615 |
+
"eval_rewards/rejected": -12.641332626342773,
|
1616 |
+
"eval_runtime": 183.1659,
|
1617 |
+
"eval_samples_per_second": 1.136,
|
1618 |
+
"eval_steps_per_second": 0.568,
|
1619 |
+
"step": 7000
|
1620 |
+
},
|
1621 |
+
{
|
1622 |
+
"epoch": 2.7669524551831643,
|
1623 |
+
"grad_norm": 0.29376310110092163,
|
1624 |
+
"learning_rate": 7.806350251804484e-06,
|
1625 |
+
"logits/chosen": -19.37668228149414,
|
1626 |
+
"logits/rejected": -18.328815460205078,
|
1627 |
+
"logps/chosen": -368.5742492675781,
|
1628 |
+
"logps/rejected": -397.5633544921875,
|
1629 |
+
"loss": 0.003,
|
1630 |
+
"rewards/accuracies": 0.9962499737739563,
|
1631 |
+
"rewards/chosen": -2.3045263290405273,
|
1632 |
+
"rewards/margins": 12.480491638183594,
|
1633 |
+
"rewards/rejected": -14.785019874572754,
|
1634 |
+
"step": 7100
|
1635 |
+
},
|
1636 |
+
{
|
1637 |
+
"epoch": 2.8059236165237724,
|
1638 |
+
"grad_norm": 0.002321546198800206,
|
1639 |
+
"learning_rate": 5.422521844388683e-06,
|
1640 |
+
"logits/chosen": -19.359325408935547,
|
1641 |
+
"logits/rejected": -18.383474349975586,
|
1642 |
+
"logps/chosen": -373.9019470214844,
|
1643 |
+
"logps/rejected": -399.3661193847656,
|
1644 |
+
"loss": 0.0047,
|
1645 |
+
"rewards/accuracies": 0.9937499761581421,
|
1646 |
+
"rewards/chosen": -2.3699896335601807,
|
1647 |
+
"rewards/margins": 12.351306915283203,
|
1648 |
+
"rewards/rejected": -14.721295356750488,
|
1649 |
+
"step": 7200
|
1650 |
+
},
|
1651 |
+
{
|
1652 |
+
"epoch": 2.8059236165237724,
|
1653 |
+
"eval_logits/chosen": -15.414888381958008,
|
1654 |
+
"eval_logits/rejected": -15.384535789489746,
|
1655 |
+
"eval_logps/chosen": -350.7855224609375,
|
1656 |
+
"eval_logps/rejected": -393.17767333984375,
|
1657 |
+
"eval_loss": 0.00028104818193241954,
|
1658 |
+
"eval_rewards/accuracies": 1.0,
|
1659 |
+
"eval_rewards/chosen": -0.9126843214035034,
|
1660 |
+
"eval_rewards/margins": 11.714097023010254,
|
1661 |
+
"eval_rewards/rejected": -12.626781463623047,
|
1662 |
+
"eval_runtime": 183.8851,
|
1663 |
+
"eval_samples_per_second": 1.131,
|
1664 |
+
"eval_steps_per_second": 0.566,
|
1665 |
+
"step": 7200
|
1666 |
+
},
|
1667 |
+
{
|
1668 |
+
"epoch": 2.8448947778643805,
|
1669 |
+
"grad_norm": 0.027766738086938858,
|
1670 |
+
"learning_rate": 3.4679943381216438e-06,
|
1671 |
+
"logits/chosen": -19.325197219848633,
|
1672 |
+
"logits/rejected": -18.378944396972656,
|
1673 |
+
"logps/chosen": -379.8866271972656,
|
1674 |
+
"logps/rejected": -401.4820251464844,
|
1675 |
+
"loss": 0.0004,
|
1676 |
+
"rewards/accuracies": 1.0,
|
1677 |
+
"rewards/chosen": -2.4111533164978027,
|
1678 |
+
"rewards/margins": 12.260860443115234,
|
1679 |
+
"rewards/rejected": -14.672014236450195,
|
1680 |
+
"step": 7300
|
1681 |
+
},
|
1682 |
+
{
|
1683 |
+
"epoch": 2.8838659392049886,
|
1684 |
+
"grad_norm": 0.0003760048421099782,
|
1685 |
+
"learning_rate": 1.9461984676158727e-06,
|
1686 |
+
"logits/chosen": -19.34538459777832,
|
1687 |
+
"logits/rejected": -18.292407989501953,
|
1688 |
+
"logps/chosen": -386.7870788574219,
|
1689 |
+
"logps/rejected": -399.7149353027344,
|
1690 |
+
"loss": 0.003,
|
1691 |
+
"rewards/accuracies": 0.9975000023841858,
|
1692 |
+
"rewards/chosen": -2.498373508453369,
|
1693 |
+
"rewards/margins": 12.094259262084961,
|
1694 |
+
"rewards/rejected": -14.592632293701172,
|
1695 |
+
"step": 7400
|
1696 |
+
},
|
1697 |
+
{
|
1698 |
+
"epoch": 2.8838659392049886,
|
1699 |
+
"eval_logits/chosen": -15.413555145263672,
|
1700 |
+
"eval_logits/rejected": -15.383076667785645,
|
1701 |
+
"eval_logps/chosen": -350.8740539550781,
|
1702 |
+
"eval_logps/rejected": -393.42462158203125,
|
1703 |
+
"eval_loss": 0.0002813572355080396,
|
1704 |
+
"eval_rewards/accuracies": 1.0,
|
1705 |
+
"eval_rewards/chosen": -0.9215376377105713,
|
1706 |
+
"eval_rewards/margins": 11.729934692382812,
|
1707 |
+
"eval_rewards/rejected": -12.651473045349121,
|
1708 |
+
"eval_runtime": 184.2047,
|
1709 |
+
"eval_samples_per_second": 1.129,
|
1710 |
+
"eval_steps_per_second": 0.565,
|
1711 |
+
"step": 7400
|
1712 |
+
},
|
1713 |
+
{
|
1714 |
+
"epoch": 2.922837100545596,
|
1715 |
+
"grad_norm": 0.0007408323581330478,
|
1716 |
+
"learning_rate": 8.598054041644155e-07,
|
1717 |
+
"logits/chosen": -19.447223663330078,
|
1718 |
+
"logits/rejected": -18.43070411682129,
|
1719 |
+
"logps/chosen": -382.068359375,
|
1720 |
+
"logps/rejected": -402.309814453125,
|
1721 |
+
"loss": 0.002,
|
1722 |
+
"rewards/accuracies": 0.9987499713897705,
|
1723 |
+
"rewards/chosen": -2.2234978675842285,
|
1724 |
+
"rewards/margins": 12.309508323669434,
|
1725 |
+
"rewards/rejected": -14.533007621765137,
|
1726 |
+
"step": 7500
|
1727 |
+
},
|
1728 |
+
{
|
1729 |
+
"epoch": 2.9618082618862043,
|
1730 |
+
"grad_norm": 0.0027539017610251904,
|
1731 |
+
"learning_rate": 2.107220670987675e-07,
|
1732 |
+
"logits/chosen": -19.346935272216797,
|
1733 |
+
"logits/rejected": -18.29763412475586,
|
1734 |
+
"logps/chosen": -388.3053894042969,
|
1735 |
+
"logps/rejected": -401.0611572265625,
|
1736 |
+
"loss": 0.003,
|
1737 |
+
"rewards/accuracies": 0.9975000023841858,
|
1738 |
+
"rewards/chosen": -2.2870028018951416,
|
1739 |
+
"rewards/margins": 12.200346946716309,
|
1740 |
+
"rewards/rejected": -14.487349510192871,
|
1741 |
+
"step": 7600
|
1742 |
+
},
|
1743 |
+
{
|
1744 |
+
"epoch": 2.9618082618862043,
|
1745 |
+
"eval_logits/chosen": -15.41310977935791,
|
1746 |
+
"eval_logits/rejected": -15.382636070251465,
|
1747 |
+
"eval_logps/chosen": -350.8951721191406,
|
1748 |
+
"eval_logps/rejected": -393.48431396484375,
|
1749 |
+
"eval_loss": 0.0002806605480145663,
|
1750 |
+
"eval_rewards/accuracies": 1.0,
|
1751 |
+
"eval_rewards/chosen": -0.9236502051353455,
|
1752 |
+
"eval_rewards/margins": 11.733796119689941,
|
1753 |
+
"eval_rewards/rejected": -12.657448768615723,
|
1754 |
+
"eval_runtime": 183.4198,
|
1755 |
+
"eval_samples_per_second": 1.134,
|
1756 |
+
"eval_steps_per_second": 0.567,
|
1757 |
+
"step": 7600
|
1758 |
}
|
1759 |
],
|
1760 |
"logging_steps": 100,
|