Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +143 -3
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83946192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6adbd6caf7bdf1842e91bc5816ccdd5448347927512e466c6ffec9628b45e389
|
3 |
size 83946192
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168150290
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf81dd98ec9c616d55d1232f7ada73dc21309fa8c33dc3f10a7dbdc17291a944
|
3 |
size 168150290
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ef525735c3cf9ca20902b64f335b1cb98298205e4df0c6e14b2c5e5e1d7d8dd
|
3 |
size 14244
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6ff283720f8f76393837f434b89763e59f3450d2b659f4e3ad09ffbdff910d9
|
3 |
size 1064
|
trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -4634,6 +4634,146 @@
|
|
4634 |
"learning_rate": 0.00012079116908177593,
|
4635 |
"loss": 0.9201,
|
4636 |
"step": 3300
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4637 |
}
|
4638 |
],
|
4639 |
"logging_steps": 5,
|
@@ -4641,7 +4781,7 @@
|
|
4641 |
"num_input_tokens_seen": 0,
|
4642 |
"num_train_epochs": 1,
|
4643 |
"save_steps": 100,
|
4644 |
-
"total_flos": 4.
|
4645 |
"train_batch_size": 2,
|
4646 |
"trial_name": null,
|
4647 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.5048630187838741,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 3400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
4634 |
"learning_rate": 0.00012079116908177593,
|
4635 |
"loss": 0.9201,
|
4636 |
"step": 3300
|
4637 |
+
},
|
4638 |
+
{
|
4639 |
+
"epoch": 0.49,
|
4640 |
+
"grad_norm": 0.33203125,
|
4641 |
+
"learning_rate": 0.00012053755650651166,
|
4642 |
+
"loss": 0.9375,
|
4643 |
+
"step": 3305
|
4644 |
+
},
|
4645 |
+
{
|
4646 |
+
"epoch": 0.49,
|
4647 |
+
"grad_norm": 0.337890625,
|
4648 |
+
"learning_rate": 0.00012028380594270283,
|
4649 |
+
"loss": 0.9681,
|
4650 |
+
"step": 3310
|
4651 |
+
},
|
4652 |
+
{
|
4653 |
+
"epoch": 0.49,
|
4654 |
+
"grad_norm": 0.34765625,
|
4655 |
+
"learning_rate": 0.00012002991909525873,
|
4656 |
+
"loss": 0.923,
|
4657 |
+
"step": 3315
|
4658 |
+
},
|
4659 |
+
{
|
4660 |
+
"epoch": 0.49,
|
4661 |
+
"grad_norm": 0.341796875,
|
4662 |
+
"learning_rate": 0.00011977589767000433,
|
4663 |
+
"loss": 0.9143,
|
4664 |
+
"step": 3320
|
4665 |
+
},
|
4666 |
+
{
|
4667 |
+
"epoch": 0.49,
|
4668 |
+
"grad_norm": 0.341796875,
|
4669 |
+
"learning_rate": 0.00011952174337366881,
|
4670 |
+
"loss": 0.9205,
|
4671 |
+
"step": 3325
|
4672 |
+
},
|
4673 |
+
{
|
4674 |
+
"epoch": 0.49,
|
4675 |
+
"grad_norm": 0.349609375,
|
4676 |
+
"learning_rate": 0.00011926745791387406,
|
4677 |
+
"loss": 0.9113,
|
4678 |
+
"step": 3330
|
4679 |
+
},
|
4680 |
+
{
|
4681 |
+
"epoch": 0.5,
|
4682 |
+
"grad_norm": 0.357421875,
|
4683 |
+
"learning_rate": 0.00011901304299912326,
|
4684 |
+
"loss": 0.9357,
|
4685 |
+
"step": 3335
|
4686 |
+
},
|
4687 |
+
{
|
4688 |
+
"epoch": 0.5,
|
4689 |
+
"grad_norm": 0.349609375,
|
4690 |
+
"learning_rate": 0.00011875850033878939,
|
4691 |
+
"loss": 0.9552,
|
4692 |
+
"step": 3340
|
4693 |
+
},
|
4694 |
+
{
|
4695 |
+
"epoch": 0.5,
|
4696 |
+
"grad_norm": 0.32421875,
|
4697 |
+
"learning_rate": 0.00011850383164310371,
|
4698 |
+
"loss": 0.9234,
|
4699 |
+
"step": 3345
|
4700 |
+
},
|
4701 |
+
{
|
4702 |
+
"epoch": 0.5,
|
4703 |
+
"grad_norm": 0.33203125,
|
4704 |
+
"learning_rate": 0.00011824903862314427,
|
4705 |
+
"loss": 0.9085,
|
4706 |
+
"step": 3350
|
4707 |
+
},
|
4708 |
+
{
|
4709 |
+
"epoch": 0.5,
|
4710 |
+
"grad_norm": 0.337890625,
|
4711 |
+
"learning_rate": 0.00011799412299082448,
|
4712 |
+
"loss": 0.9226,
|
4713 |
+
"step": 3355
|
4714 |
+
},
|
4715 |
+
{
|
4716 |
+
"epoch": 0.5,
|
4717 |
+
"grad_norm": 0.341796875,
|
4718 |
+
"learning_rate": 0.00011773908645888152,
|
4719 |
+
"loss": 0.9107,
|
4720 |
+
"step": 3360
|
4721 |
+
},
|
4722 |
+
{
|
4723 |
+
"epoch": 0.5,
|
4724 |
+
"grad_norm": 0.349609375,
|
4725 |
+
"learning_rate": 0.00011748393074086497,
|
4726 |
+
"loss": 0.9402,
|
4727 |
+
"step": 3365
|
4728 |
+
},
|
4729 |
+
{
|
4730 |
+
"epoch": 0.5,
|
4731 |
+
"grad_norm": 0.341796875,
|
4732 |
+
"learning_rate": 0.00011722865755112504,
|
4733 |
+
"loss": 0.9419,
|
4734 |
+
"step": 3370
|
4735 |
+
},
|
4736 |
+
{
|
4737 |
+
"epoch": 0.5,
|
4738 |
+
"grad_norm": 0.359375,
|
4739 |
+
"learning_rate": 0.00011697326860480133,
|
4740 |
+
"loss": 0.9146,
|
4741 |
+
"step": 3375
|
4742 |
+
},
|
4743 |
+
{
|
4744 |
+
"epoch": 0.5,
|
4745 |
+
"grad_norm": 0.34375,
|
4746 |
+
"learning_rate": 0.00011671776561781123,
|
4747 |
+
"loss": 0.8996,
|
4748 |
+
"step": 3380
|
4749 |
+
},
|
4750 |
+
{
|
4751 |
+
"epoch": 0.5,
|
4752 |
+
"grad_norm": 0.341796875,
|
4753 |
+
"learning_rate": 0.00011646215030683818,
|
4754 |
+
"loss": 0.9089,
|
4755 |
+
"step": 3385
|
4756 |
+
},
|
4757 |
+
{
|
4758 |
+
"epoch": 0.5,
|
4759 |
+
"grad_norm": 0.34765625,
|
4760 |
+
"learning_rate": 0.0001162064243893205,
|
4761 |
+
"loss": 0.9257,
|
4762 |
+
"step": 3390
|
4763 |
+
},
|
4764 |
+
{
|
4765 |
+
"epoch": 0.5,
|
4766 |
+
"grad_norm": 0.333984375,
|
4767 |
+
"learning_rate": 0.00011595058958343952,
|
4768 |
+
"loss": 0.924,
|
4769 |
+
"step": 3395
|
4770 |
+
},
|
4771 |
+
{
|
4772 |
+
"epoch": 0.5,
|
4773 |
+
"grad_norm": 0.34375,
|
4774 |
+
"learning_rate": 0.00011569464760810825,
|
4775 |
+
"loss": 0.9558,
|
4776 |
+
"step": 3400
|
4777 |
}
|
4778 |
],
|
4779 |
"logging_steps": 5,
|
|
|
4781 |
"num_input_tokens_seen": 0,
|
4782 |
"num_train_epochs": 1,
|
4783 |
"save_steps": 100,
|
4784 |
+
"total_flos": 4.781280773300814e+18,
|
4785 |
"train_batch_size": 2,
|
4786 |
"trial_name": null,
|
4787 |
"trial_params": null
|