Uploaded checkpoint-4000
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +2 -2
- scheduler.pt +1 -1
- trainer_state.json +153 -3
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 119975656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae99966885dfadca210314bf64872ce443f70308df6e4727adcc50f428ab66db
|
3 |
size 119975656
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 60477396
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2812ec63c28059aad0edb8123a9e90f5f8301e979f2372ce02fe039956e98169
|
3 |
size 60477396
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b12fc07e36413d2b0b11012030944d448c215499606c7c88123ca1e537650ca8
|
3 |
+
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f80b0441e18382140898e5947e4bf00161c8985bfd13094069daa8dad861cc8
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -457,6 +457,156 @@
|
|
457 |
"eval_samples_per_second": 5.189,
|
458 |
"eval_steps_per_second": 5.189,
|
459 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
460 |
}
|
461 |
],
|
462 |
"logging_steps": 100,
|
@@ -464,7 +614,7 @@
|
|
464 |
"num_input_tokens_seen": 0,
|
465 |
"num_train_epochs": 2,
|
466 |
"save_steps": 1000,
|
467 |
-
"total_flos":
|
468 |
"train_batch_size": 1,
|
469 |
"trial_name": null,
|
470 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.2532309861361322,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 4000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
457 |
"eval_samples_per_second": 5.189,
|
458 |
"eval_steps_per_second": 5.189,
|
459 |
"step": 3000
|
460 |
+
},
|
461 |
+
{
|
462 |
+
"epoch": 0.97,
|
463 |
+
"grad_norm": 0.0015150770777836442,
|
464 |
+
"learning_rate": 8.444444444444446e-06,
|
465 |
+
"loss": 0.055,
|
466 |
+
"step": 3100
|
467 |
+
},
|
468 |
+
{
|
469 |
+
"epoch": 0.97,
|
470 |
+
"eval_loss": 0.020349696278572083,
|
471 |
+
"eval_runtime": 192.752,
|
472 |
+
"eval_samples_per_second": 5.188,
|
473 |
+
"eval_steps_per_second": 5.188,
|
474 |
+
"step": 3100
|
475 |
+
},
|
476 |
+
{
|
477 |
+
"epoch": 1.0,
|
478 |
+
"grad_norm": 0.8284673690795898,
|
479 |
+
"learning_rate": 8.000000000000001e-06,
|
480 |
+
"loss": 0.0424,
|
481 |
+
"step": 3200
|
482 |
+
},
|
483 |
+
{
|
484 |
+
"epoch": 1.0,
|
485 |
+
"eval_loss": 0.011587778106331825,
|
486 |
+
"eval_runtime": 192.4082,
|
487 |
+
"eval_samples_per_second": 5.197,
|
488 |
+
"eval_steps_per_second": 5.197,
|
489 |
+
"step": 3200
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 1.03,
|
493 |
+
"grad_norm": 0.00324226007796824,
|
494 |
+
"learning_rate": 7.555555555555556e-06,
|
495 |
+
"loss": 0.0232,
|
496 |
+
"step": 3300
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"epoch": 1.03,
|
500 |
+
"eval_loss": 0.02541309781372547,
|
501 |
+
"eval_runtime": 192.4753,
|
502 |
+
"eval_samples_per_second": 5.195,
|
503 |
+
"eval_steps_per_second": 5.195,
|
504 |
+
"step": 3300
|
505 |
+
},
|
506 |
+
{
|
507 |
+
"epoch": 1.07,
|
508 |
+
"grad_norm": 0.0018368299352005124,
|
509 |
+
"learning_rate": 7.111111111111112e-06,
|
510 |
+
"loss": 0.0391,
|
511 |
+
"step": 3400
|
512 |
+
},
|
513 |
+
{
|
514 |
+
"epoch": 1.07,
|
515 |
+
"eval_loss": 0.019817600026726723,
|
516 |
+
"eval_runtime": 192.2908,
|
517 |
+
"eval_samples_per_second": 5.2,
|
518 |
+
"eval_steps_per_second": 5.2,
|
519 |
+
"step": 3400
|
520 |
+
},
|
521 |
+
{
|
522 |
+
"epoch": 1.1,
|
523 |
+
"grad_norm": 0.001375267980620265,
|
524 |
+
"learning_rate": 6.666666666666667e-06,
|
525 |
+
"loss": 0.029,
|
526 |
+
"step": 3500
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"epoch": 1.1,
|
530 |
+
"eval_loss": 0.014760646037757397,
|
531 |
+
"eval_runtime": 192.1713,
|
532 |
+
"eval_samples_per_second": 5.204,
|
533 |
+
"eval_steps_per_second": 5.204,
|
534 |
+
"step": 3500
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 1.13,
|
538 |
+
"grad_norm": 1.2048271894454956,
|
539 |
+
"learning_rate": 6.222222222222223e-06,
|
540 |
+
"loss": 0.028,
|
541 |
+
"step": 3600
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"epoch": 1.13,
|
545 |
+
"eval_loss": 0.024096647277474403,
|
546 |
+
"eval_runtime": 191.7597,
|
547 |
+
"eval_samples_per_second": 5.215,
|
548 |
+
"eval_steps_per_second": 5.215,
|
549 |
+
"step": 3600
|
550 |
+
},
|
551 |
+
{
|
552 |
+
"epoch": 1.16,
|
553 |
+
"grad_norm": 1.7010436058044434,
|
554 |
+
"learning_rate": 5.777777777777778e-06,
|
555 |
+
"loss": 0.033,
|
556 |
+
"step": 3700
|
557 |
+
},
|
558 |
+
{
|
559 |
+
"epoch": 1.16,
|
560 |
+
"eval_loss": 0.024101875722408295,
|
561 |
+
"eval_runtime": 191.6566,
|
562 |
+
"eval_samples_per_second": 5.218,
|
563 |
+
"eval_steps_per_second": 5.218,
|
564 |
+
"step": 3700
|
565 |
+
},
|
566 |
+
{
|
567 |
+
"epoch": 1.19,
|
568 |
+
"grad_norm": 0.4044632613658905,
|
569 |
+
"learning_rate": 5.333333333333334e-06,
|
570 |
+
"loss": 0.0411,
|
571 |
+
"step": 3800
|
572 |
+
},
|
573 |
+
{
|
574 |
+
"epoch": 1.19,
|
575 |
+
"eval_loss": 0.020846880972385406,
|
576 |
+
"eval_runtime": 192.1837,
|
577 |
+
"eval_samples_per_second": 5.203,
|
578 |
+
"eval_steps_per_second": 5.203,
|
579 |
+
"step": 3800
|
580 |
+
},
|
581 |
+
{
|
582 |
+
"epoch": 1.22,
|
583 |
+
"grad_norm": 0.47499576210975647,
|
584 |
+
"learning_rate": 4.888888888888889e-06,
|
585 |
+
"loss": 0.0233,
|
586 |
+
"step": 3900
|
587 |
+
},
|
588 |
+
{
|
589 |
+
"epoch": 1.22,
|
590 |
+
"eval_loss": 0.010989435017108917,
|
591 |
+
"eval_runtime": 192.4351,
|
592 |
+
"eval_samples_per_second": 5.197,
|
593 |
+
"eval_steps_per_second": 5.197,
|
594 |
+
"step": 3900
|
595 |
+
},
|
596 |
+
{
|
597 |
+
"epoch": 1.25,
|
598 |
+
"grad_norm": 0.017001571133732796,
|
599 |
+
"learning_rate": 4.444444444444444e-06,
|
600 |
+
"loss": 0.0204,
|
601 |
+
"step": 4000
|
602 |
+
},
|
603 |
+
{
|
604 |
+
"epoch": 1.25,
|
605 |
+
"eval_loss": 0.02116994932293892,
|
606 |
+
"eval_runtime": 192.8643,
|
607 |
+
"eval_samples_per_second": 5.185,
|
608 |
+
"eval_steps_per_second": 5.185,
|
609 |
+
"step": 4000
|
610 |
}
|
611 |
],
|
612 |
"logging_steps": 100,
|
|
|
614 |
"num_input_tokens_seen": 0,
|
615 |
"num_train_epochs": 2,
|
616 |
"save_steps": 1000,
|
617 |
+
"total_flos": 6.4408503975936e+16,
|
618 |
"train_batch_size": 1,
|
619 |
"trial_name": null,
|
620 |
"trial_params": null
|