diff --git a/trainer_outputs/checkpoint-1000/README.md b/trainer_outputs/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c --- /dev/null +++ b/trainer_outputs/checkpoint-1000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: QuantizationMethod.BITS_AND_BYTES +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.4.0.dev0 diff --git a/trainer_outputs/checkpoint-1000/adapter_config.json b/trainer_outputs/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5151aebe3c16ec286f715ae36817f8ff2eff5704 --- /dev/null +++ b/trainer_outputs/checkpoint-1000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/trainer_outputs/checkpoint-1000/adapter_model.bin b/trainer_outputs/checkpoint-1000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..569f6af637a050b21c0c5b14c7899f36ca06095e --- /dev/null +++ b/trainer_outputs/checkpoint-1000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82799ca1a45fe916290346f1b9e0f1feddd4e86d9382374bfddf9b91c9ce63cf +size 25234701 diff --git a/trainer_outputs/checkpoint-1000/optimizer.pt b/trainer_outputs/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c64c717aff5ad80c9e3e60c9182e05633c2e975a --- /dev/null +++ b/trainer_outputs/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66889dd2cdecfb4f607a362a8d178c7677c707a9675e7d809725b0e44b0465e5 +size 50492421 diff --git a/trainer_outputs/checkpoint-1000/rng_state.pth b/trainer_outputs/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e98ce33fbadce90ab71e8f943501111ad75f3209 --- /dev/null +++ b/trainer_outputs/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f089defdca994fd4a77c942eddae1f8040469d99290aacaa2b7dd2f1c723771a +size 14575 diff --git a/trainer_outputs/checkpoint-1000/scheduler.pt b/trainer_outputs/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4aa15717fb7621800c1ff1c0d975a92ceab5372 --- /dev/null +++ b/trainer_outputs/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b66e5da39344e1f07ca8f668859ea57276c22f61d58e9090e115dea625966b1 +size 627 diff --git a/trainer_outputs/checkpoint-1000/trainer_state.json b/trainer_outputs/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..80696ad6621c0c3cd331bfe7df32a519b7856fbb --- /dev/null +++ b/trainer_outputs/checkpoint-1000/trainer_state.json @@ -0,0 +1,319 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2312272385687034, + "eval_steps": 500, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.0185, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 0.8649, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 0.7784, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 0.7386, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001990530303030303, + "loss": 0.7019, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001981060606060606, + "loss": 0.7117, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019715909090909094, + "loss": 0.672, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019621212121212123, + "loss": 0.664, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019526515151515152, + "loss": 0.6666, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001943181818181818, + "loss": 0.6685, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019337121212121213, + "loss": 0.6788, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019242424242424245, + "loss": 0.6673, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019147727272727274, + "loss": 0.6628, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019053030303030303, + "loss": 0.6643, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018958333333333332, + "loss": 0.6607, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018863636363636364, + "loss": 0.6706, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018768939393939396, + "loss": 0.6709, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018674242424242425, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018579545454545454, + "loss": 0.6566, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018484848484848484, + "loss": 0.6513, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018390151515151518, + "loss": 0.6797, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018295454545454547, + "loss": 0.6599, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018200757575757577, + "loss": 0.6561, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018106060606060606, + "loss": 0.662, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018011363636363638, + "loss": 0.6629, + "step": 520 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001791666666666667, + "loss": 0.6475, + "step": 540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017821969696969699, + "loss": 0.6607, + "step": 560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017727272727272728, + "loss": 0.6512, + "step": 580 + }, + { + "epoch": 0.14, + "learning_rate": 0.00017632575757575757, + "loss": 0.6484, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001753787878787879, + "loss": 0.6403, + "step": 620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001744318181818182, + "loss": 0.6537, + "step": 640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001734848484848485, + "loss": 0.6516, + "step": 660 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001725378787878788, + "loss": 0.6577, + "step": 680 + }, + { + "epoch": 0.16, + "learning_rate": 0.00017159090909090908, + "loss": 0.6374, + "step": 700 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001706439393939394, + "loss": 0.6551, + "step": 720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00016969696969696972, + "loss": 0.6388, + "step": 740 + }, + { + "epoch": 0.18, + "learning_rate": 0.00016875, + "loss": 0.64, + "step": 760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001678030303030303, + "loss": 0.6579, + "step": 780 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001668560606060606, + "loss": 0.6525, + "step": 800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016590909090909094, + "loss": 0.6261, + "step": 820 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016496212121212123, + "loss": 0.6351, + "step": 840 + }, + { + "epoch": 0.2, + "learning_rate": 0.00016401515151515152, + "loss": 0.6537, + "step": 860 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001630681818181818, + "loss": 0.6448, + "step": 880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016212121212121213, + "loss": 0.638, + "step": 900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016117424242424245, + "loss": 0.6503, + "step": 920 + }, + { + "epoch": 0.22, + "learning_rate": 0.00016022727272727274, + "loss": 0.6378, + "step": 940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015928030303030303, + "loss": 0.643, + "step": 960 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015833333333333332, + "loss": 0.6235, + "step": 980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015738636363636364, + "loss": 0.647, + "step": 1000 + } + ], + "logging_steps": 20, + "max_steps": 4324, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 1.585613047974052e+17, + "trial_name": null, + "trial_params": null +} diff --git a/trainer_outputs/checkpoint-1000/training_args.bin b/trainer_outputs/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee6008563f39382fbd86c959b2567ac52f8f8ada --- /dev/null +++ b/trainer_outputs/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7228d256901ac224db1b4d229c9c22d5c5a0db9d57b13da26daf55eb856deda8 +size 4027 diff --git a/trainer_outputs/checkpoint-1500/README.md b/trainer_outputs/checkpoint-1500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c --- /dev/null +++ b/trainer_outputs/checkpoint-1500/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: QuantizationMethod.BITS_AND_BYTES +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.4.0.dev0 diff --git a/trainer_outputs/checkpoint-1500/adapter_config.json b/trainer_outputs/checkpoint-1500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5151aebe3c16ec286f715ae36817f8ff2eff5704 --- /dev/null +++ b/trainer_outputs/checkpoint-1500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/trainer_outputs/checkpoint-1500/adapter_model.bin b/trainer_outputs/checkpoint-1500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb47daebcde6802fe3f4dd06b3e1470f47bda6a1 --- /dev/null +++ b/trainer_outputs/checkpoint-1500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29b3f38bf25dcaff8e7b129981d4b9c2bee90a0f37f1f989c8d2d617403ffe6 +size 25234701 diff --git a/trainer_outputs/checkpoint-1500/optimizer.pt b/trainer_outputs/checkpoint-1500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d8ab03292db258fd230272ef83b6f5d6748b003 --- /dev/null +++ b/trainer_outputs/checkpoint-1500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61af784ec13831fee055258efb45cf0e65c5b2c4e37ec22fc7233cfa9bd8201 +size 50492421 diff --git a/trainer_outputs/checkpoint-1500/rng_state.pth b/trainer_outputs/checkpoint-1500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..78a3a827cccdca530c25ab9f81a98d6c1df83493 --- /dev/null +++ b/trainer_outputs/checkpoint-1500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b82c3ec8adf3edbc776662f56f6ace906d215005bbb5145d4decfc9fcf8884dc +size 14575 diff --git a/trainer_outputs/checkpoint-1500/scheduler.pt b/trainer_outputs/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7233d6b95b56017739bd62480b72872680c9c311 --- /dev/null +++ b/trainer_outputs/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d95b496b8b0cc781be268c6cc8cc1122584ff984d38077c6c85705da817465a +size 627 diff --git a/trainer_outputs/checkpoint-1500/trainer_state.json b/trainer_outputs/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0a151c5ac9d811cbf6a94e0db513c2cd3ddc5d84 --- /dev/null +++ b/trainer_outputs/checkpoint-1500/trainer_state.json @@ -0,0 +1,469 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3468408578530551, + "eval_steps": 500, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.0185, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 0.8649, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 0.7784, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 0.7386, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001990530303030303, + "loss": 0.7019, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001981060606060606, + "loss": 0.7117, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019715909090909094, + "loss": 0.672, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019621212121212123, + "loss": 0.664, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019526515151515152, + "loss": 0.6666, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001943181818181818, + "loss": 0.6685, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019337121212121213, + "loss": 0.6788, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019242424242424245, + "loss": 0.6673, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019147727272727274, + "loss": 0.6628, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019053030303030303, + "loss": 0.6643, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018958333333333332, + "loss": 0.6607, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018863636363636364, + "loss": 0.6706, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018768939393939396, + "loss": 0.6709, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018674242424242425, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018579545454545454, + "loss": 0.6566, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018484848484848484, + "loss": 0.6513, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018390151515151518, + "loss": 0.6797, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018295454545454547, + "loss": 0.6599, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018200757575757577, + "loss": 0.6561, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018106060606060606, + "loss": 0.662, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018011363636363638, + "loss": 0.6629, + "step": 520 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001791666666666667, + "loss": 0.6475, + "step": 540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017821969696969699, + "loss": 0.6607, + "step": 560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017727272727272728, + "loss": 0.6512, + "step": 580 + }, + { + "epoch": 0.14, + "learning_rate": 0.00017632575757575757, + "loss": 0.6484, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001753787878787879, + "loss": 0.6403, + "step": 620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001744318181818182, + "loss": 0.6537, + "step": 640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001734848484848485, + "loss": 0.6516, + "step": 660 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001725378787878788, + "loss": 0.6577, + "step": 680 + }, + { + "epoch": 0.16, + "learning_rate": 0.00017159090909090908, + "loss": 0.6374, + "step": 700 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001706439393939394, + "loss": 0.6551, + "step": 720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00016969696969696972, + "loss": 0.6388, + "step": 740 + }, + { + "epoch": 0.18, + "learning_rate": 0.00016875, + "loss": 0.64, + "step": 760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001678030303030303, + "loss": 0.6579, + "step": 780 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001668560606060606, + "loss": 0.6525, + "step": 800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016590909090909094, + "loss": 0.6261, + "step": 820 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016496212121212123, + "loss": 0.6351, + "step": 840 + }, + { + "epoch": 0.2, + "learning_rate": 0.00016401515151515152, + "loss": 0.6537, + "step": 860 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001630681818181818, + "loss": 0.6448, + "step": 880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016212121212121213, + "loss": 0.638, + "step": 900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016117424242424245, + "loss": 0.6503, + "step": 920 + }, + { + "epoch": 0.22, + "learning_rate": 0.00016022727272727274, + "loss": 0.6378, + "step": 940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015928030303030303, + "loss": 0.643, + "step": 960 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015833333333333332, + "loss": 0.6235, + "step": 980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015738636363636364, + "loss": 0.647, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015643939393939396, + "loss": 0.6408, + "step": 1020 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015549242424242425, + "loss": 0.6391, + "step": 1040 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015454545454545454, + "loss": 0.6356, + "step": 1060 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015359848484848484, + "loss": 0.6317, + "step": 1080 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015265151515151515, + "loss": 0.6413, + "step": 1100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015170454545454547, + "loss": 0.6338, + "step": 1120 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015075757575757576, + "loss": 0.6422, + "step": 1140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014981060606060606, + "loss": 0.6442, + "step": 1160 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014886363636363635, + "loss": 0.6523, + "step": 1180 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001479166666666667, + "loss": 0.6349, + "step": 1200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014696969696969698, + "loss": 0.6389, + "step": 1220 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014602272727272728, + "loss": 0.6468, + "step": 1240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014507575757575757, + "loss": 0.6431, + "step": 1260 + }, + { + "epoch": 0.3, + "learning_rate": 0.00014412878787878789, + "loss": 0.6287, + "step": 1280 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001431818181818182, + "loss": 0.6438, + "step": 1300 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001422348484848485, + "loss": 0.6274, + "step": 1320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001412878787878788, + "loss": 0.6286, + "step": 1340 + }, + { + "epoch": 0.31, + "learning_rate": 0.00014034090909090908, + "loss": 0.6401, + "step": 1360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001393939393939394, + "loss": 0.6472, + "step": 1380 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013844696969696972, + "loss": 0.6458, + "step": 1400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001375, + "loss": 0.6117, + "step": 1420 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001365530303030303, + "loss": 0.6271, + "step": 1440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001356060606060606, + "loss": 0.6287, + "step": 1460 + }, + { + "epoch": 0.34, + "learning_rate": 0.00013465909090909094, + "loss": 0.6336, + "step": 1480 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013371212121212123, + "loss": 0.6404, + "step": 1500 + } + ], + "logging_steps": 20, + "max_steps": 4324, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 2.3869669769642803e+17, + "trial_name": null, + "trial_params": null +} diff --git a/trainer_outputs/checkpoint-1500/training_args.bin b/trainer_outputs/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee6008563f39382fbd86c959b2567ac52f8f8ada --- /dev/null +++ b/trainer_outputs/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7228d256901ac224db1b4d229c9c22d5c5a0db9d57b13da26daf55eb856deda8 +size 4027 diff --git a/trainer_outputs/checkpoint-2000/README.md b/trainer_outputs/checkpoint-2000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c --- /dev/null +++ b/trainer_outputs/checkpoint-2000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: QuantizationMethod.BITS_AND_BYTES +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.4.0.dev0 diff --git a/trainer_outputs/checkpoint-2000/adapter_config.json b/trainer_outputs/checkpoint-2000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5151aebe3c16ec286f715ae36817f8ff2eff5704 --- /dev/null +++ b/trainer_outputs/checkpoint-2000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/trainer_outputs/checkpoint-2000/adapter_model.bin b/trainer_outputs/checkpoint-2000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..24d34a6c7d039c4a19bf9b8e6869562c5a3db116 --- /dev/null +++ b/trainer_outputs/checkpoint-2000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5f023896d3bce7e5c49eda104abd6b27e11486aaaecedbc4eae110aedb5f4a3 +size 25234701 diff --git a/trainer_outputs/checkpoint-2000/optimizer.pt b/trainer_outputs/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a22606fa93de95051210eff05d591a9fa32d3ff --- /dev/null +++ b/trainer_outputs/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a039b3641a121c8176a70d33b35f5e3be4162fbedc0ec7ccece47cacb8a5911 +size 50492421 diff --git a/trainer_outputs/checkpoint-2000/rng_state.pth b/trainer_outputs/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e12bfd190b8315e1d5086b3a0c9aca2cd5b7737 --- /dev/null +++ b/trainer_outputs/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8688f4574528b0dd0a74d53cc2c94a63206527c586044a61ce3ac53363dbbe16 +size 14575 diff --git a/trainer_outputs/checkpoint-2000/scheduler.pt b/trainer_outputs/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..11c48d2d13892a0c00dd5b15de868fc31687afc3 --- /dev/null +++ b/trainer_outputs/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14717a8990d65aecab81f167499716a720c74df7ac5531a158cd13cf1a7bb0c4 +size 627 diff --git a/trainer_outputs/checkpoint-2000/trainer_state.json b/trainer_outputs/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c485f4735fe3f13cd2019747f820e2339b31bbed --- /dev/null +++ b/trainer_outputs/checkpoint-2000/trainer_state.json @@ -0,0 +1,619 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4624544771374068, + "eval_steps": 500, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.0185, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 0.8649, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 0.7784, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 0.7386, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001990530303030303, + "loss": 0.7019, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001981060606060606, + "loss": 0.7117, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019715909090909094, + "loss": 0.672, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019621212121212123, + "loss": 0.664, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019526515151515152, + "loss": 0.6666, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001943181818181818, + "loss": 0.6685, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019337121212121213, + "loss": 0.6788, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019242424242424245, + "loss": 0.6673, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019147727272727274, + "loss": 0.6628, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019053030303030303, + "loss": 0.6643, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018958333333333332, + "loss": 0.6607, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018863636363636364, + "loss": 0.6706, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018768939393939396, + "loss": 0.6709, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018674242424242425, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018579545454545454, + "loss": 0.6566, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018484848484848484, + "loss": 0.6513, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018390151515151518, + "loss": 0.6797, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018295454545454547, + "loss": 0.6599, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018200757575757577, + "loss": 0.6561, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018106060606060606, + "loss": 0.662, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018011363636363638, + "loss": 0.6629, + "step": 520 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001791666666666667, + "loss": 0.6475, + "step": 540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017821969696969699, + "loss": 0.6607, + "step": 560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017727272727272728, + "loss": 0.6512, + "step": 580 + }, + { + "epoch": 0.14, + "learning_rate": 0.00017632575757575757, + "loss": 0.6484, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001753787878787879, + "loss": 0.6403, + "step": 620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001744318181818182, + "loss": 0.6537, + "step": 640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001734848484848485, + "loss": 0.6516, + "step": 660 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001725378787878788, + "loss": 0.6577, + "step": 680 + }, + { + "epoch": 0.16, + "learning_rate": 0.00017159090909090908, + "loss": 0.6374, + "step": 700 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001706439393939394, + "loss": 0.6551, + "step": 720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00016969696969696972, + "loss": 0.6388, + "step": 740 + }, + { + "epoch": 0.18, + "learning_rate": 0.00016875, + "loss": 0.64, + "step": 760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001678030303030303, + "loss": 0.6579, + "step": 780 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001668560606060606, + "loss": 0.6525, + "step": 800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016590909090909094, + "loss": 0.6261, + "step": 820 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016496212121212123, + "loss": 0.6351, + "step": 840 + }, + { + "epoch": 0.2, + "learning_rate": 0.00016401515151515152, + "loss": 0.6537, + "step": 860 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001630681818181818, + "loss": 0.6448, + "step": 880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016212121212121213, + "loss": 0.638, + "step": 900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016117424242424245, + "loss": 0.6503, + "step": 920 + }, + { + "epoch": 0.22, + "learning_rate": 0.00016022727272727274, + "loss": 0.6378, + "step": 940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015928030303030303, + "loss": 0.643, + "step": 960 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015833333333333332, + "loss": 0.6235, + "step": 980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015738636363636364, + "loss": 0.647, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015643939393939396, + "loss": 0.6408, + "step": 1020 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015549242424242425, + "loss": 0.6391, + "step": 1040 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015454545454545454, + "loss": 0.6356, + "step": 1060 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015359848484848484, + "loss": 0.6317, + "step": 1080 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015265151515151515, + "loss": 0.6413, + "step": 1100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015170454545454547, + "loss": 0.6338, + "step": 1120 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015075757575757576, + "loss": 0.6422, + "step": 1140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014981060606060606, + "loss": 0.6442, + "step": 1160 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014886363636363635, + "loss": 0.6523, + "step": 1180 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001479166666666667, + "loss": 0.6349, + "step": 1200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014696969696969698, + "loss": 0.6389, + "step": 1220 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014602272727272728, + "loss": 0.6468, + "step": 1240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014507575757575757, + "loss": 0.6431, + "step": 1260 + }, + { + "epoch": 0.3, + "learning_rate": 0.00014412878787878789, + "loss": 0.6287, + "step": 1280 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001431818181818182, + "loss": 0.6438, + "step": 1300 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001422348484848485, + "loss": 0.6274, + "step": 1320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001412878787878788, + "loss": 0.6286, + "step": 1340 + }, + { + "epoch": 0.31, + "learning_rate": 0.00014034090909090908, + "loss": 0.6401, + "step": 1360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001393939393939394, + "loss": 0.6472, + "step": 1380 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013844696969696972, + "loss": 0.6458, + "step": 1400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001375, + "loss": 0.6117, + "step": 1420 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001365530303030303, + "loss": 0.6271, + "step": 1440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001356060606060606, + "loss": 0.6287, + "step": 1460 + }, + { + "epoch": 0.34, + "learning_rate": 0.00013465909090909094, + "loss": 0.6336, + "step": 1480 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013371212121212123, + "loss": 0.6404, + "step": 1500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013276515151515152, + "loss": 0.6312, + "step": 1520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001318181818181818, + "loss": 0.6168, + "step": 1540 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013087121212121213, + "loss": 0.6272, + "step": 1560 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012992424242424245, + "loss": 0.6477, + "step": 1580 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012897727272727274, + "loss": 0.6477, + "step": 1600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012803030303030303, + "loss": 0.6227, + "step": 1620 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012708333333333332, + "loss": 0.6224, + "step": 1640 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012613636363636364, + "loss": 0.6315, + "step": 1660 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012518939393939396, + "loss": 0.631, + "step": 1680 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012424242424242425, + "loss": 0.6285, + "step": 1700 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012329545454545454, + "loss": 0.6359, + "step": 1720 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012234848484848484, + "loss": 0.6282, + "step": 1740 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012140151515151517, + "loss": 0.6196, + "step": 1760 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012045454545454546, + "loss": 0.6346, + "step": 1780 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011950757575757576, + "loss": 0.6323, + "step": 1800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011856060606060606, + "loss": 0.6108, + "step": 1820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011761363636363636, + "loss": 0.6324, + "step": 1840 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011666666666666668, + "loss": 0.618, + "step": 1860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011571969696969698, + "loss": 0.6099, + "step": 1880 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011477272727272728, + "loss": 0.6251, + "step": 1900 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011382575757575758, + "loss": 0.6209, + "step": 1920 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001128787878787879, + "loss": 0.6218, + "step": 1940 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011193181818181819, + "loss": 0.6299, + "step": 1960 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001109848484848485, + "loss": 0.6211, + "step": 1980 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011003787878787879, + "loss": 0.6072, + "step": 2000 + } + ], + "logging_steps": 20, + "max_steps": 4324, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 3.17991699908395e+17, + "trial_name": null, + "trial_params": null +} diff --git a/trainer_outputs/checkpoint-2000/training_args.bin b/trainer_outputs/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee6008563f39382fbd86c959b2567ac52f8f8ada --- /dev/null +++ b/trainer_outputs/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7228d256901ac224db1b4d229c9c22d5c5a0db9d57b13da26daf55eb856deda8 +size 4027 diff --git a/trainer_outputs/checkpoint-2500/README.md b/trainer_outputs/checkpoint-2500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c --- /dev/null +++ b/trainer_outputs/checkpoint-2500/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: QuantizationMethod.BITS_AND_BYTES +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.4.0.dev0 diff --git a/trainer_outputs/checkpoint-2500/adapter_config.json b/trainer_outputs/checkpoint-2500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5151aebe3c16ec286f715ae36817f8ff2eff5704 --- /dev/null +++ b/trainer_outputs/checkpoint-2500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/trainer_outputs/checkpoint-2500/adapter_model.bin b/trainer_outputs/checkpoint-2500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8e4ff0d2cf6441c87f1e68eb3f36a7551197962 --- /dev/null +++ b/trainer_outputs/checkpoint-2500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:232c8f03d31a9291d76c358dcd1d1ec35c9e353649649655c3cb339d78705648 +size 25234701 diff --git a/trainer_outputs/checkpoint-2500/optimizer.pt b/trainer_outputs/checkpoint-2500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1aae12cb602a6a5d067b2cc948c3529678198fa6 --- /dev/null +++ b/trainer_outputs/checkpoint-2500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea86e060244a7a635f0bdd53220d40319af85bb4f19b7de97088630fe91b44fb +size 50492421 diff --git a/trainer_outputs/checkpoint-2500/rng_state.pth b/trainer_outputs/checkpoint-2500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d86c810eb3d7abd029511f83122abc37a54a64f1 --- /dev/null +++ b/trainer_outputs/checkpoint-2500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cac5163dde6699de99afde41cd4831c7918e26095b1b779e0b6fe701f6bd9f5 +size 14575 diff --git a/trainer_outputs/checkpoint-2500/scheduler.pt b/trainer_outputs/checkpoint-2500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a8c3d7ba89dfa19cbf7ef8a2bee6918194b4f4e --- /dev/null +++ b/trainer_outputs/checkpoint-2500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b98e94e36151b9adbe76956f48af083681c1527c598d294d1c8d293199a3ccb +size 627 diff --git a/trainer_outputs/checkpoint-2500/trainer_state.json b/trainer_outputs/checkpoint-2500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4bde617c9eccfce16c175a33f5b1bc325850f654 --- /dev/null +++ b/trainer_outputs/checkpoint-2500/trainer_state.json @@ -0,0 +1,769 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5780680964217585, + "eval_steps": 500, + "global_step": 2500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.0185, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 0.8649, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 0.7784, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 0.7386, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001990530303030303, + "loss": 0.7019, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001981060606060606, + "loss": 0.7117, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019715909090909094, + "loss": 0.672, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019621212121212123, + "loss": 0.664, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019526515151515152, + "loss": 0.6666, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001943181818181818, + "loss": 0.6685, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019337121212121213, + "loss": 0.6788, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019242424242424245, + "loss": 0.6673, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019147727272727274, + "loss": 0.6628, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019053030303030303, + "loss": 0.6643, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018958333333333332, + "loss": 0.6607, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018863636363636364, + "loss": 0.6706, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018768939393939396, + "loss": 0.6709, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018674242424242425, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018579545454545454, + "loss": 0.6566, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018484848484848484, + "loss": 0.6513, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018390151515151518, + "loss": 0.6797, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018295454545454547, + "loss": 0.6599, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018200757575757577, + "loss": 0.6561, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018106060606060606, + "loss": 0.662, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018011363636363638, + "loss": 0.6629, + "step": 520 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001791666666666667, + "loss": 0.6475, + "step": 540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017821969696969699, + "loss": 0.6607, + "step": 560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017727272727272728, + "loss": 0.6512, + "step": 580 + }, + { + "epoch": 0.14, + "learning_rate": 0.00017632575757575757, + "loss": 0.6484, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001753787878787879, + "loss": 0.6403, + "step": 620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001744318181818182, + "loss": 0.6537, + "step": 640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001734848484848485, + "loss": 0.6516, + "step": 660 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001725378787878788, + "loss": 0.6577, + "step": 680 + }, + { + "epoch": 0.16, + "learning_rate": 0.00017159090909090908, + "loss": 0.6374, + "step": 700 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001706439393939394, + "loss": 0.6551, + "step": 720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00016969696969696972, + "loss": 0.6388, + "step": 740 + }, + { + "epoch": 0.18, + "learning_rate": 0.00016875, + "loss": 0.64, + "step": 760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001678030303030303, + "loss": 0.6579, + "step": 780 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001668560606060606, + "loss": 0.6525, + "step": 800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016590909090909094, + "loss": 0.6261, + "step": 820 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016496212121212123, + "loss": 0.6351, + "step": 840 + }, + { + "epoch": 0.2, + "learning_rate": 0.00016401515151515152, + "loss": 0.6537, + "step": 860 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001630681818181818, + "loss": 0.6448, + "step": 880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016212121212121213, + "loss": 0.638, + "step": 900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016117424242424245, + "loss": 0.6503, + "step": 920 + }, + { + "epoch": 0.22, + "learning_rate": 0.00016022727272727274, + "loss": 0.6378, + "step": 940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015928030303030303, + "loss": 0.643, + "step": 960 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015833333333333332, + "loss": 0.6235, + "step": 980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015738636363636364, + "loss": 0.647, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015643939393939396, + "loss": 0.6408, + "step": 1020 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015549242424242425, + "loss": 0.6391, + "step": 1040 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015454545454545454, + "loss": 0.6356, + "step": 1060 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015359848484848484, + "loss": 0.6317, + "step": 1080 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015265151515151515, + "loss": 0.6413, + "step": 1100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015170454545454547, + "loss": 0.6338, + "step": 1120 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015075757575757576, + "loss": 0.6422, + "step": 1140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014981060606060606, + "loss": 0.6442, + "step": 1160 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014886363636363635, + "loss": 0.6523, + "step": 1180 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001479166666666667, + "loss": 0.6349, + "step": 1200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014696969696969698, + "loss": 0.6389, + "step": 1220 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014602272727272728, + "loss": 0.6468, + "step": 1240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014507575757575757, + "loss": 0.6431, + "step": 1260 + }, + { + "epoch": 0.3, + "learning_rate": 0.00014412878787878789, + "loss": 0.6287, + "step": 1280 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001431818181818182, + "loss": 0.6438, + "step": 1300 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001422348484848485, + "loss": 0.6274, + "step": 1320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001412878787878788, + "loss": 0.6286, + "step": 1340 + }, + { + "epoch": 0.31, + "learning_rate": 0.00014034090909090908, + "loss": 0.6401, + "step": 1360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001393939393939394, + "loss": 0.6472, + "step": 1380 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013844696969696972, + "loss": 0.6458, + "step": 1400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001375, + "loss": 0.6117, + "step": 1420 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001365530303030303, + "loss": 0.6271, + "step": 1440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001356060606060606, + "loss": 0.6287, + "step": 1460 + }, + { + "epoch": 0.34, + "learning_rate": 0.00013465909090909094, + "loss": 0.6336, + "step": 1480 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013371212121212123, + "loss": 0.6404, + "step": 1500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013276515151515152, + "loss": 0.6312, + "step": 1520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001318181818181818, + "loss": 0.6168, + "step": 1540 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013087121212121213, + "loss": 0.6272, + "step": 1560 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012992424242424245, + "loss": 0.6477, + "step": 1580 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012897727272727274, + "loss": 0.6477, + "step": 1600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012803030303030303, + "loss": 0.6227, + "step": 1620 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012708333333333332, + "loss": 0.6224, + "step": 1640 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012613636363636364, + "loss": 0.6315, + "step": 1660 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012518939393939396, + "loss": 0.631, + "step": 1680 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012424242424242425, + "loss": 0.6285, + "step": 1700 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012329545454545454, + "loss": 0.6359, + "step": 1720 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012234848484848484, + "loss": 0.6282, + "step": 1740 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012140151515151517, + "loss": 0.6196, + "step": 1760 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012045454545454546, + "loss": 0.6346, + "step": 1780 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011950757575757576, + "loss": 0.6323, + "step": 1800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011856060606060606, + "loss": 0.6108, + "step": 1820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011761363636363636, + "loss": 0.6324, + "step": 1840 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011666666666666668, + "loss": 0.618, + "step": 1860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011571969696969698, + "loss": 0.6099, + "step": 1880 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011477272727272728, + "loss": 0.6251, + "step": 1900 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011382575757575758, + "loss": 0.6209, + "step": 1920 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001128787878787879, + "loss": 0.6218, + "step": 1940 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011193181818181819, + "loss": 0.6299, + "step": 1960 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001109848484848485, + "loss": 0.6211, + "step": 1980 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011003787878787879, + "loss": 0.6072, + "step": 2000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010909090909090909, + "loss": 0.6264, + "step": 2020 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010814393939393941, + "loss": 0.6248, + "step": 2040 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001071969696969697, + "loss": 0.6125, + "step": 2060 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010625000000000001, + "loss": 0.6294, + "step": 2080 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001053030303030303, + "loss": 0.6193, + "step": 2100 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001043560606060606, + "loss": 0.6293, + "step": 2120 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010340909090909092, + "loss": 0.629, + "step": 2140 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010246212121212121, + "loss": 0.6353, + "step": 2160 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010151515151515152, + "loss": 0.6268, + "step": 2180 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010056818181818181, + "loss": 0.6256, + "step": 2200 + }, + { + "epoch": 0.51, + "learning_rate": 9.962121212121213e-05, + "loss": 0.6404, + "step": 2220 + }, + { + "epoch": 0.52, + "learning_rate": 9.867424242424242e-05, + "loss": 0.6347, + "step": 2240 + }, + { + "epoch": 0.52, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6317, + "step": 2260 + }, + { + "epoch": 0.53, + "learning_rate": 9.678030303030303e-05, + "loss": 0.6335, + "step": 2280 + }, + { + "epoch": 0.53, + "learning_rate": 9.583333333333334e-05, + "loss": 0.6239, + "step": 2300 + }, + { + "epoch": 0.54, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6302, + "step": 2320 + }, + { + "epoch": 0.54, + "learning_rate": 9.393939393939395e-05, + "loss": 0.6182, + "step": 2340 + }, + { + "epoch": 0.55, + "learning_rate": 9.299242424242425e-05, + "loss": 0.6219, + "step": 2360 + }, + { + "epoch": 0.55, + "learning_rate": 9.204545454545454e-05, + "loss": 0.6229, + "step": 2380 + }, + { + "epoch": 0.55, + "learning_rate": 9.109848484848486e-05, + "loss": 0.6413, + "step": 2400 + }, + { + "epoch": 0.56, + "learning_rate": 9.015151515151515e-05, + "loss": 0.6237, + "step": 2420 + }, + { + "epoch": 0.56, + "learning_rate": 8.920454545454546e-05, + "loss": 0.6397, + "step": 2440 + }, + { + "epoch": 0.57, + "learning_rate": 8.825757575757576e-05, + "loss": 0.6259, + "step": 2460 + }, + { + "epoch": 0.57, + "learning_rate": 8.731060606060605e-05, + "loss": 0.634, + "step": 2480 + }, + { + "epoch": 0.58, + "learning_rate": 8.636363636363637e-05, + "loss": 0.622, + "step": 2500 + } + ], + "logging_steps": 20, + "max_steps": 4324, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 3.972710256835707e+17, + "trial_name": null, + "trial_params": null +} diff --git a/trainer_outputs/checkpoint-2500/training_args.bin b/trainer_outputs/checkpoint-2500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee6008563f39382fbd86c959b2567ac52f8f8ada --- /dev/null +++ b/trainer_outputs/checkpoint-2500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7228d256901ac224db1b4d229c9c22d5c5a0db9d57b13da26daf55eb856deda8 +size 4027 diff --git a/trainer_outputs/checkpoint-3000/README.md b/trainer_outputs/checkpoint-3000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c --- /dev/null +++ b/trainer_outputs/checkpoint-3000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: QuantizationMethod.BITS_AND_BYTES +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.4.0.dev0 diff --git a/trainer_outputs/checkpoint-3000/adapter_config.json b/trainer_outputs/checkpoint-3000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5151aebe3c16ec286f715ae36817f8ff2eff5704 --- /dev/null +++ b/trainer_outputs/checkpoint-3000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/trainer_outputs/checkpoint-3000/adapter_model.bin b/trainer_outputs/checkpoint-3000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a6b3c66fadf2834906e25ca4eb26dc829d0b13c6 --- /dev/null +++ b/trainer_outputs/checkpoint-3000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e858acac1298d05fd061f5db236358558d7da5467a6f587dfdabc156b10f1007 +size 25234701 diff --git a/trainer_outputs/checkpoint-3000/optimizer.pt b/trainer_outputs/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8bc9999764970052ad1ee30e7c864b54fb18f46 --- /dev/null +++ b/trainer_outputs/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd8f4f87fafc48f5c57b8c135f3123244df4a6638af1f5dbafdfd8ab9f8d4f2 +size 50492421 diff --git a/trainer_outputs/checkpoint-3000/rng_state.pth b/trainer_outputs/checkpoint-3000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..62b4b4e0d330cb97572f11857dd79eb329f7ed7d --- /dev/null +++ b/trainer_outputs/checkpoint-3000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2c4d029b8b38ce03f11c146ff3d4b63707e299b27e04bbebd26959eb6cf2de +size 14575 diff --git a/trainer_outputs/checkpoint-3000/scheduler.pt b/trainer_outputs/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..87a2048e0df2cf46157356a4d056c27e43f72159 --- /dev/null +++ b/trainer_outputs/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cff36dfd5c81bbf72d17b2bd6a4adff33fa030d7597ff652e5db60dce81e415 +size 627 diff --git a/trainer_outputs/checkpoint-3000/trainer_state.json b/trainer_outputs/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b837ac22f9399d7a99c5b4f11ed8f0469c422b13 --- /dev/null +++ b/trainer_outputs/checkpoint-3000/trainer_state.json @@ -0,0 +1,919 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6936817157061101, + "eval_steps": 500, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.0185, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 0.8649, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 0.7784, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 0.7386, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001990530303030303, + "loss": 0.7019, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001981060606060606, + "loss": 0.7117, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019715909090909094, + "loss": 0.672, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019621212121212123, + "loss": 0.664, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019526515151515152, + "loss": 0.6666, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001943181818181818, + "loss": 0.6685, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019337121212121213, + "loss": 0.6788, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019242424242424245, + "loss": 0.6673, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019147727272727274, + "loss": 0.6628, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019053030303030303, + "loss": 0.6643, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018958333333333332, + "loss": 0.6607, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018863636363636364, + "loss": 0.6706, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018768939393939396, + "loss": 0.6709, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018674242424242425, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018579545454545454, + "loss": 0.6566, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018484848484848484, + "loss": 0.6513, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018390151515151518, + "loss": 0.6797, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018295454545454547, + "loss": 0.6599, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018200757575757577, + "loss": 0.6561, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018106060606060606, + "loss": 0.662, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018011363636363638, + "loss": 0.6629, + "step": 520 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001791666666666667, + "loss": 0.6475, + "step": 540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017821969696969699, + "loss": 0.6607, + "step": 560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017727272727272728, + "loss": 0.6512, + "step": 580 + }, + { + "epoch": 0.14, + "learning_rate": 0.00017632575757575757, + "loss": 0.6484, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001753787878787879, + "loss": 0.6403, + "step": 620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001744318181818182, + "loss": 0.6537, + "step": 640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001734848484848485, + "loss": 0.6516, + "step": 660 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001725378787878788, + "loss": 0.6577, + "step": 680 + }, + { + "epoch": 0.16, + "learning_rate": 0.00017159090909090908, + "loss": 0.6374, + "step": 700 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001706439393939394, + "loss": 0.6551, + "step": 720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00016969696969696972, + "loss": 0.6388, + "step": 740 + }, + { + "epoch": 0.18, + "learning_rate": 0.00016875, + "loss": 0.64, + "step": 760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001678030303030303, + "loss": 0.6579, + "step": 780 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001668560606060606, + "loss": 0.6525, + "step": 800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016590909090909094, + "loss": 0.6261, + "step": 820 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016496212121212123, + "loss": 0.6351, + "step": 840 + }, + { + "epoch": 0.2, + "learning_rate": 0.00016401515151515152, + "loss": 0.6537, + "step": 860 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001630681818181818, + "loss": 0.6448, + "step": 880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016212121212121213, + "loss": 0.638, + "step": 900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016117424242424245, + "loss": 0.6503, + "step": 920 + }, + { + "epoch": 0.22, + "learning_rate": 0.00016022727272727274, + "loss": 0.6378, + "step": 940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015928030303030303, + "loss": 0.643, + "step": 960 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015833333333333332, + "loss": 0.6235, + "step": 980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015738636363636364, + "loss": 0.647, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015643939393939396, + "loss": 0.6408, + "step": 1020 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015549242424242425, + "loss": 0.6391, + "step": 1040 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015454545454545454, + "loss": 0.6356, + "step": 1060 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015359848484848484, + "loss": 0.6317, + "step": 1080 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015265151515151515, + "loss": 0.6413, + "step": 1100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015170454545454547, + "loss": 0.6338, + "step": 1120 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015075757575757576, + "loss": 0.6422, + "step": 1140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014981060606060606, + "loss": 0.6442, + "step": 1160 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014886363636363635, + "loss": 0.6523, + "step": 1180 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001479166666666667, + "loss": 0.6349, + "step": 1200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014696969696969698, + "loss": 0.6389, + "step": 1220 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014602272727272728, + "loss": 0.6468, + "step": 1240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014507575757575757, + "loss": 0.6431, + "step": 1260 + }, + { + "epoch": 0.3, + "learning_rate": 0.00014412878787878789, + "loss": 0.6287, + "step": 1280 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001431818181818182, + "loss": 0.6438, + "step": 1300 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001422348484848485, + "loss": 0.6274, + "step": 1320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001412878787878788, + "loss": 0.6286, + "step": 1340 + }, + { + "epoch": 0.31, + "learning_rate": 0.00014034090909090908, + "loss": 0.6401, + "step": 1360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001393939393939394, + "loss": 0.6472, + "step": 1380 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013844696969696972, + "loss": 0.6458, + "step": 1400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001375, + "loss": 0.6117, + "step": 1420 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001365530303030303, + "loss": 0.6271, + "step": 1440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001356060606060606, + "loss": 0.6287, + "step": 1460 + }, + { + "epoch": 0.34, + "learning_rate": 0.00013465909090909094, + "loss": 0.6336, + "step": 1480 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013371212121212123, + "loss": 0.6404, + "step": 1500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013276515151515152, + "loss": 0.6312, + "step": 1520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001318181818181818, + "loss": 0.6168, + "step": 1540 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013087121212121213, + "loss": 0.6272, + "step": 1560 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012992424242424245, + "loss": 0.6477, + "step": 1580 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012897727272727274, + "loss": 0.6477, + "step": 1600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012803030303030303, + "loss": 0.6227, + "step": 1620 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012708333333333332, + "loss": 0.6224, + "step": 1640 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012613636363636364, + "loss": 0.6315, + "step": 1660 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012518939393939396, + "loss": 0.631, + "step": 1680 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012424242424242425, + "loss": 0.6285, + "step": 1700 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012329545454545454, + "loss": 0.6359, + "step": 1720 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012234848484848484, + "loss": 0.6282, + "step": 1740 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012140151515151517, + "loss": 0.6196, + "step": 1760 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012045454545454546, + "loss": 0.6346, + "step": 1780 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011950757575757576, + "loss": 0.6323, + "step": 1800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011856060606060606, + "loss": 0.6108, + "step": 1820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011761363636363636, + "loss": 0.6324, + "step": 1840 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011666666666666668, + "loss": 0.618, + "step": 1860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011571969696969698, + "loss": 0.6099, + "step": 1880 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011477272727272728, + "loss": 0.6251, + "step": 1900 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011382575757575758, + "loss": 0.6209, + "step": 1920 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001128787878787879, + "loss": 0.6218, + "step": 1940 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011193181818181819, + "loss": 0.6299, + "step": 1960 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001109848484848485, + "loss": 0.6211, + "step": 1980 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011003787878787879, + "loss": 0.6072, + "step": 2000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010909090909090909, + "loss": 0.6264, + "step": 2020 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010814393939393941, + "loss": 0.6248, + "step": 2040 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001071969696969697, + "loss": 0.6125, + "step": 2060 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010625000000000001, + "loss": 0.6294, + "step": 2080 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001053030303030303, + "loss": 0.6193, + "step": 2100 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001043560606060606, + "loss": 0.6293, + "step": 2120 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010340909090909092, + "loss": 0.629, + "step": 2140 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010246212121212121, + "loss": 0.6353, + "step": 2160 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010151515151515152, + "loss": 0.6268, + "step": 2180 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010056818181818181, + "loss": 0.6256, + "step": 2200 + }, + { + "epoch": 0.51, + "learning_rate": 9.962121212121213e-05, + "loss": 0.6404, + "step": 2220 + }, + { + "epoch": 0.52, + "learning_rate": 9.867424242424242e-05, + "loss": 0.6347, + "step": 2240 + }, + { + "epoch": 0.52, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6317, + "step": 2260 + }, + { + "epoch": 0.53, + "learning_rate": 9.678030303030303e-05, + "loss": 0.6335, + "step": 2280 + }, + { + "epoch": 0.53, + "learning_rate": 9.583333333333334e-05, + "loss": 0.6239, + "step": 2300 + }, + { + "epoch": 0.54, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6302, + "step": 2320 + }, + { + "epoch": 0.54, + "learning_rate": 9.393939393939395e-05, + "loss": 0.6182, + "step": 2340 + }, + { + "epoch": 0.55, + "learning_rate": 9.299242424242425e-05, + "loss": 0.6219, + "step": 2360 + }, + { + "epoch": 0.55, + "learning_rate": 9.204545454545454e-05, + "loss": 0.6229, + "step": 2380 + }, + { + "epoch": 0.55, + "learning_rate": 9.109848484848486e-05, + "loss": 0.6413, + "step": 2400 + }, + { + "epoch": 0.56, + "learning_rate": 9.015151515151515e-05, + "loss": 0.6237, + "step": 2420 + }, + { + "epoch": 0.56, + "learning_rate": 8.920454545454546e-05, + "loss": 0.6397, + "step": 2440 + }, + { + "epoch": 0.57, + "learning_rate": 8.825757575757576e-05, + "loss": 0.6259, + "step": 2460 + }, + { + "epoch": 0.57, + "learning_rate": 8.731060606060605e-05, + "loss": 0.634, + "step": 2480 + }, + { + "epoch": 0.58, + "learning_rate": 8.636363636363637e-05, + "loss": 0.622, + "step": 2500 + }, + { + "epoch": 0.58, + "learning_rate": 8.541666666666666e-05, + "loss": 0.6279, + "step": 2520 + }, + { + "epoch": 0.59, + "learning_rate": 8.446969696969697e-05, + "loss": 0.6306, + "step": 2540 + }, + { + "epoch": 0.59, + "learning_rate": 8.352272727272727e-05, + "loss": 0.6288, + "step": 2560 + }, + { + "epoch": 0.6, + "learning_rate": 8.257575757575758e-05, + "loss": 0.6297, + "step": 2580 + }, + { + "epoch": 0.6, + "learning_rate": 8.162878787878789e-05, + "loss": 0.6119, + "step": 2600 + }, + { + "epoch": 0.61, + "learning_rate": 8.068181818181818e-05, + "loss": 0.6227, + "step": 2620 + }, + { + "epoch": 0.61, + "learning_rate": 7.97348484848485e-05, + "loss": 0.6317, + "step": 2640 + }, + { + "epoch": 0.62, + "learning_rate": 7.878787878787879e-05, + "loss": 0.619, + "step": 2660 + }, + { + "epoch": 0.62, + "learning_rate": 7.784090909090909e-05, + "loss": 0.6156, + "step": 2680 + }, + { + "epoch": 0.62, + "learning_rate": 7.68939393939394e-05, + "loss": 0.6116, + "step": 2700 + }, + { + "epoch": 0.63, + "learning_rate": 7.59469696969697e-05, + "loss": 0.6009, + "step": 2720 + }, + { + "epoch": 0.63, + "learning_rate": 7.500000000000001e-05, + "loss": 0.6226, + "step": 2740 + }, + { + "epoch": 0.64, + "learning_rate": 7.40530303030303e-05, + "loss": 0.6246, + "step": 2760 + }, + { + "epoch": 0.64, + "learning_rate": 7.310606060606062e-05, + "loss": 0.6035, + "step": 2780 + }, + { + "epoch": 0.65, + "learning_rate": 7.215909090909091e-05, + "loss": 0.6059, + "step": 2800 + }, + { + "epoch": 0.65, + "learning_rate": 7.121212121212121e-05, + "loss": 0.6387, + "step": 2820 + }, + { + "epoch": 0.66, + "learning_rate": 7.026515151515152e-05, + "loss": 0.6314, + "step": 2840 + }, + { + "epoch": 0.66, + "learning_rate": 6.931818181818182e-05, + "loss": 0.6246, + "step": 2860 + }, + { + "epoch": 0.67, + "learning_rate": 6.837121212121213e-05, + "loss": 0.6448, + "step": 2880 + }, + { + "epoch": 0.67, + "learning_rate": 6.742424242424242e-05, + "loss": 0.5997, + "step": 2900 + }, + { + "epoch": 0.68, + "learning_rate": 6.647727272727274e-05, + "loss": 0.6238, + "step": 2920 + }, + { + "epoch": 0.68, + "learning_rate": 6.553030303030303e-05, + "loss": 0.614, + "step": 2940 + }, + { + "epoch": 0.68, + "learning_rate": 6.458333333333334e-05, + "loss": 0.6162, + "step": 2960 + }, + { + "epoch": 0.69, + "learning_rate": 6.363636363636364e-05, + "loss": 0.6087, + "step": 2980 + }, + { + "epoch": 0.69, + "learning_rate": 6.268939393939395e-05, + "loss": 0.6215, + "step": 3000 + } + ], + "logging_steps": 20, + "max_steps": 4324, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 4.765794561611612e+17, + "trial_name": null, + "trial_params": null +} diff --git a/trainer_outputs/checkpoint-3000/training_args.bin b/trainer_outputs/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee6008563f39382fbd86c959b2567ac52f8f8ada --- /dev/null +++ b/trainer_outputs/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7228d256901ac224db1b4d229c9c22d5c5a0db9d57b13da26daf55eb856deda8 +size 4027 diff --git a/trainer_outputs/checkpoint-3500/README.md b/trainer_outputs/checkpoint-3500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c --- /dev/null +++ b/trainer_outputs/checkpoint-3500/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: QuantizationMethod.BITS_AND_BYTES +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.4.0.dev0 diff --git a/trainer_outputs/checkpoint-3500/adapter_config.json b/trainer_outputs/checkpoint-3500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5151aebe3c16ec286f715ae36817f8ff2eff5704 --- /dev/null +++ b/trainer_outputs/checkpoint-3500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/trainer_outputs/checkpoint-3500/adapter_model.bin b/trainer_outputs/checkpoint-3500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e9a55813d8274f2d4c8ab3250c9fd070bcaa7fe --- /dev/null +++ b/trainer_outputs/checkpoint-3500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8aa6a4b12739df4688b250dd42d8865ac502c5a0dd524517df8835286483fe6 +size 25234701 diff --git a/trainer_outputs/checkpoint-3500/optimizer.pt b/trainer_outputs/checkpoint-3500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..942374167beab6ca115425b5e0860ed706917006 --- /dev/null +++ b/trainer_outputs/checkpoint-3500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bd31c79477c8bb20a9e4fb58d06bfaa1153405e0d8f485f44dbeab8a05279f9 +size 50492421 diff --git a/trainer_outputs/checkpoint-3500/rng_state.pth b/trainer_outputs/checkpoint-3500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4a24e22dde2d238db5814033dbd7f6053fa43c48 --- /dev/null +++ b/trainer_outputs/checkpoint-3500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0d2c9d6c9d4ebaf9f40dab1df0a6bc899fcb38fbb02ef45403f0219e1031b81 +size 14575 diff --git a/trainer_outputs/checkpoint-3500/scheduler.pt b/trainer_outputs/checkpoint-3500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e82ffe2b50bb3cee955983a57689f9766714b48 --- /dev/null +++ b/trainer_outputs/checkpoint-3500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2396934a3bf1f6f3d1949f841a8f1c1aa502806ee333ffb75a170b9c1a5fbbe +size 627 diff --git a/trainer_outputs/checkpoint-3500/trainer_state.json b/trainer_outputs/checkpoint-3500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0fc4f0ba2db492695983687df5a141729eb10f3d --- /dev/null +++ b/trainer_outputs/checkpoint-3500/trainer_state.json @@ -0,0 +1,1069 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8092953349904619, + "eval_steps": 500, + "global_step": 3500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.0185, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 0.8649, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 0.7784, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 0.7386, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001990530303030303, + "loss": 0.7019, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001981060606060606, + "loss": 0.7117, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019715909090909094, + "loss": 0.672, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019621212121212123, + "loss": 0.664, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019526515151515152, + "loss": 0.6666, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001943181818181818, + "loss": 0.6685, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019337121212121213, + "loss": 0.6788, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019242424242424245, + "loss": 0.6673, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019147727272727274, + "loss": 0.6628, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019053030303030303, + "loss": 0.6643, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018958333333333332, + "loss": 0.6607, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018863636363636364, + "loss": 0.6706, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018768939393939396, + "loss": 0.6709, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018674242424242425, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018579545454545454, + "loss": 0.6566, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018484848484848484, + "loss": 0.6513, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018390151515151518, + "loss": 0.6797, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018295454545454547, + "loss": 0.6599, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018200757575757577, + "loss": 0.6561, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018106060606060606, + "loss": 0.662, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018011363636363638, + "loss": 0.6629, + "step": 520 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001791666666666667, + "loss": 0.6475, + "step": 540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017821969696969699, + "loss": 0.6607, + "step": 560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017727272727272728, + "loss": 0.6512, + "step": 580 + }, + { + "epoch": 0.14, + "learning_rate": 0.00017632575757575757, + "loss": 0.6484, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001753787878787879, + "loss": 0.6403, + "step": 620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001744318181818182, + "loss": 0.6537, + "step": 640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001734848484848485, + "loss": 0.6516, + "step": 660 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001725378787878788, + "loss": 0.6577, + "step": 680 + }, + { + "epoch": 0.16, + "learning_rate": 0.00017159090909090908, + "loss": 0.6374, + "step": 700 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001706439393939394, + "loss": 0.6551, + "step": 720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00016969696969696972, + "loss": 0.6388, + "step": 740 + }, + { + "epoch": 0.18, + "learning_rate": 0.00016875, + "loss": 0.64, + "step": 760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001678030303030303, + "loss": 0.6579, + "step": 780 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001668560606060606, + "loss": 0.6525, + "step": 800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016590909090909094, + "loss": 0.6261, + "step": 820 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016496212121212123, + "loss": 0.6351, + "step": 840 + }, + { + "epoch": 0.2, + "learning_rate": 0.00016401515151515152, + "loss": 0.6537, + "step": 860 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001630681818181818, + "loss": 0.6448, + "step": 880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016212121212121213, + "loss": 0.638, + "step": 900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016117424242424245, + "loss": 0.6503, + "step": 920 + }, + { + "epoch": 0.22, + "learning_rate": 0.00016022727272727274, + "loss": 0.6378, + "step": 940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015928030303030303, + "loss": 0.643, + "step": 960 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015833333333333332, + "loss": 0.6235, + "step": 980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015738636363636364, + "loss": 0.647, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015643939393939396, + "loss": 0.6408, + "step": 1020 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015549242424242425, + "loss": 0.6391, + "step": 1040 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015454545454545454, + "loss": 0.6356, + "step": 1060 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015359848484848484, + "loss": 0.6317, + "step": 1080 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015265151515151515, + "loss": 0.6413, + "step": 1100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015170454545454547, + "loss": 0.6338, + "step": 1120 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015075757575757576, + "loss": 0.6422, + "step": 1140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014981060606060606, + "loss": 0.6442, + "step": 1160 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014886363636363635, + "loss": 0.6523, + "step": 1180 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001479166666666667, + "loss": 0.6349, + "step": 1200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014696969696969698, + "loss": 0.6389, + "step": 1220 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014602272727272728, + "loss": 0.6468, + "step": 1240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014507575757575757, + "loss": 0.6431, + "step": 1260 + }, + { + "epoch": 0.3, + "learning_rate": 0.00014412878787878789, + "loss": 0.6287, + "step": 1280 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001431818181818182, + "loss": 0.6438, + "step": 1300 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001422348484848485, + "loss": 0.6274, + "step": 1320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001412878787878788, + "loss": 0.6286, + "step": 1340 + }, + { + "epoch": 0.31, + "learning_rate": 0.00014034090909090908, + "loss": 0.6401, + "step": 1360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001393939393939394, + "loss": 0.6472, + "step": 1380 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013844696969696972, + "loss": 0.6458, + "step": 1400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001375, + "loss": 0.6117, + "step": 1420 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001365530303030303, + "loss": 0.6271, + "step": 1440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001356060606060606, + "loss": 0.6287, + "step": 1460 + }, + { + "epoch": 0.34, + "learning_rate": 0.00013465909090909094, + "loss": 0.6336, + "step": 1480 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013371212121212123, + "loss": 0.6404, + "step": 1500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013276515151515152, + "loss": 0.6312, + "step": 1520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001318181818181818, + "loss": 0.6168, + "step": 1540 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013087121212121213, + "loss": 0.6272, + "step": 1560 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012992424242424245, + "loss": 0.6477, + "step": 1580 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012897727272727274, + "loss": 0.6477, + "step": 1600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012803030303030303, + "loss": 0.6227, + "step": 1620 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012708333333333332, + "loss": 0.6224, + "step": 1640 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012613636363636364, + "loss": 0.6315, + "step": 1660 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012518939393939396, + "loss": 0.631, + "step": 1680 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012424242424242425, + "loss": 0.6285, + "step": 1700 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012329545454545454, + "loss": 0.6359, + "step": 1720 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012234848484848484, + "loss": 0.6282, + "step": 1740 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012140151515151517, + "loss": 0.6196, + "step": 1760 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012045454545454546, + "loss": 0.6346, + "step": 1780 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011950757575757576, + "loss": 0.6323, + "step": 1800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011856060606060606, + "loss": 0.6108, + "step": 1820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011761363636363636, + "loss": 0.6324, + "step": 1840 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011666666666666668, + "loss": 0.618, + "step": 1860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011571969696969698, + "loss": 0.6099, + "step": 1880 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011477272727272728, + "loss": 0.6251, + "step": 1900 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011382575757575758, + "loss": 0.6209, + "step": 1920 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001128787878787879, + "loss": 0.6218, + "step": 1940 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011193181818181819, + "loss": 0.6299, + "step": 1960 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001109848484848485, + "loss": 0.6211, + "step": 1980 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011003787878787879, + "loss": 0.6072, + "step": 2000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010909090909090909, + "loss": 0.6264, + "step": 2020 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010814393939393941, + "loss": 0.6248, + "step": 2040 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001071969696969697, + "loss": 0.6125, + "step": 2060 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010625000000000001, + "loss": 0.6294, + "step": 2080 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001053030303030303, + "loss": 0.6193, + "step": 2100 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001043560606060606, + "loss": 0.6293, + "step": 2120 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010340909090909092, + "loss": 0.629, + "step": 2140 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010246212121212121, + "loss": 0.6353, + "step": 2160 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010151515151515152, + "loss": 0.6268, + "step": 2180 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010056818181818181, + "loss": 0.6256, + "step": 2200 + }, + { + "epoch": 0.51, + "learning_rate": 9.962121212121213e-05, + "loss": 0.6404, + "step": 2220 + }, + { + "epoch": 0.52, + "learning_rate": 9.867424242424242e-05, + "loss": 0.6347, + "step": 2240 + }, + { + "epoch": 0.52, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6317, + "step": 2260 + }, + { + "epoch": 0.53, + "learning_rate": 9.678030303030303e-05, + "loss": 0.6335, + "step": 2280 + }, + { + "epoch": 0.53, + "learning_rate": 9.583333333333334e-05, + "loss": 0.6239, + "step": 2300 + }, + { + "epoch": 0.54, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6302, + "step": 2320 + }, + { + "epoch": 0.54, + "learning_rate": 9.393939393939395e-05, + "loss": 0.6182, + "step": 2340 + }, + { + "epoch": 0.55, + "learning_rate": 9.299242424242425e-05, + "loss": 0.6219, + "step": 2360 + }, + { + "epoch": 0.55, + "learning_rate": 9.204545454545454e-05, + "loss": 0.6229, + "step": 2380 + }, + { + "epoch": 0.55, + "learning_rate": 9.109848484848486e-05, + "loss": 0.6413, + "step": 2400 + }, + { + "epoch": 0.56, + "learning_rate": 9.015151515151515e-05, + "loss": 0.6237, + "step": 2420 + }, + { + "epoch": 0.56, + "learning_rate": 8.920454545454546e-05, + "loss": 0.6397, + "step": 2440 + }, + { + "epoch": 0.57, + "learning_rate": 8.825757575757576e-05, + "loss": 0.6259, + "step": 2460 + }, + { + "epoch": 0.57, + "learning_rate": 8.731060606060605e-05, + "loss": 0.634, + "step": 2480 + }, + { + "epoch": 0.58, + "learning_rate": 8.636363636363637e-05, + "loss": 0.622, + "step": 2500 + }, + { + "epoch": 0.58, + "learning_rate": 8.541666666666666e-05, + "loss": 0.6279, + "step": 2520 + }, + { + "epoch": 0.59, + "learning_rate": 8.446969696969697e-05, + "loss": 0.6306, + "step": 2540 + }, + { + "epoch": 0.59, + "learning_rate": 8.352272727272727e-05, + "loss": 0.6288, + "step": 2560 + }, + { + "epoch": 0.6, + "learning_rate": 8.257575757575758e-05, + "loss": 0.6297, + "step": 2580 + }, + { + "epoch": 0.6, + "learning_rate": 8.162878787878789e-05, + "loss": 0.6119, + "step": 2600 + }, + { + "epoch": 0.61, + "learning_rate": 8.068181818181818e-05, + "loss": 0.6227, + "step": 2620 + }, + { + "epoch": 0.61, + "learning_rate": 7.97348484848485e-05, + "loss": 0.6317, + "step": 2640 + }, + { + "epoch": 0.62, + "learning_rate": 7.878787878787879e-05, + "loss": 0.619, + "step": 2660 + }, + { + "epoch": 0.62, + "learning_rate": 7.784090909090909e-05, + "loss": 0.6156, + "step": 2680 + }, + { + "epoch": 0.62, + "learning_rate": 7.68939393939394e-05, + "loss": 0.6116, + "step": 2700 + }, + { + "epoch": 0.63, + "learning_rate": 7.59469696969697e-05, + "loss": 0.6009, + "step": 2720 + }, + { + "epoch": 0.63, + "learning_rate": 7.500000000000001e-05, + "loss": 0.6226, + "step": 2740 + }, + { + "epoch": 0.64, + "learning_rate": 7.40530303030303e-05, + "loss": 0.6246, + "step": 2760 + }, + { + "epoch": 0.64, + "learning_rate": 7.310606060606062e-05, + "loss": 0.6035, + "step": 2780 + }, + { + "epoch": 0.65, + "learning_rate": 7.215909090909091e-05, + "loss": 0.6059, + "step": 2800 + }, + { + "epoch": 0.65, + "learning_rate": 7.121212121212121e-05, + "loss": 0.6387, + "step": 2820 + }, + { + "epoch": 0.66, + "learning_rate": 7.026515151515152e-05, + "loss": 0.6314, + "step": 2840 + }, + { + "epoch": 0.66, + "learning_rate": 6.931818181818182e-05, + "loss": 0.6246, + "step": 2860 + }, + { + "epoch": 0.67, + "learning_rate": 6.837121212121213e-05, + "loss": 0.6448, + "step": 2880 + }, + { + "epoch": 0.67, + "learning_rate": 6.742424242424242e-05, + "loss": 0.5997, + "step": 2900 + }, + { + "epoch": 0.68, + "learning_rate": 6.647727272727274e-05, + "loss": 0.6238, + "step": 2920 + }, + { + "epoch": 0.68, + "learning_rate": 6.553030303030303e-05, + "loss": 0.614, + "step": 2940 + }, + { + "epoch": 0.68, + "learning_rate": 6.458333333333334e-05, + "loss": 0.6162, + "step": 2960 + }, + { + "epoch": 0.69, + "learning_rate": 6.363636363636364e-05, + "loss": 0.6087, + "step": 2980 + }, + { + "epoch": 0.69, + "learning_rate": 6.268939393939395e-05, + "loss": 0.6215, + "step": 3000 + }, + { + "epoch": 0.7, + "learning_rate": 6.174242424242425e-05, + "loss": 0.6022, + "step": 3020 + }, + { + "epoch": 0.7, + "learning_rate": 6.079545454545454e-05, + "loss": 0.6294, + "step": 3040 + }, + { + "epoch": 0.71, + "learning_rate": 5.9848484848484854e-05, + "loss": 0.6187, + "step": 3060 + }, + { + "epoch": 0.71, + "learning_rate": 5.890151515151515e-05, + "loss": 0.6194, + "step": 3080 + }, + { + "epoch": 0.72, + "learning_rate": 5.7954545454545464e-05, + "loss": 0.6109, + "step": 3100 + }, + { + "epoch": 0.72, + "learning_rate": 5.700757575757576e-05, + "loss": 0.638, + "step": 3120 + }, + { + "epoch": 0.73, + "learning_rate": 5.606060606060606e-05, + "loss": 0.6156, + "step": 3140 + }, + { + "epoch": 0.73, + "learning_rate": 5.5113636363636366e-05, + "loss": 0.6209, + "step": 3160 + }, + { + "epoch": 0.74, + "learning_rate": 5.4166666666666664e-05, + "loss": 0.6068, + "step": 3180 + }, + { + "epoch": 0.74, + "learning_rate": 5.3219696969696976e-05, + "loss": 0.6161, + "step": 3200 + }, + { + "epoch": 0.74, + "learning_rate": 5.2272727272727274e-05, + "loss": 0.6354, + "step": 3220 + }, + { + "epoch": 0.75, + "learning_rate": 5.132575757575758e-05, + "loss": 0.6317, + "step": 3240 + }, + { + "epoch": 0.75, + "learning_rate": 5.037878787878788e-05, + "loss": 0.6227, + "step": 3260 + }, + { + "epoch": 0.76, + "learning_rate": 4.943181818181818e-05, + "loss": 0.6176, + "step": 3280 + }, + { + "epoch": 0.76, + "learning_rate": 4.848484848484849e-05, + "loss": 0.6274, + "step": 3300 + }, + { + "epoch": 0.77, + "learning_rate": 4.753787878787879e-05, + "loss": 0.6159, + "step": 3320 + }, + { + "epoch": 0.77, + "learning_rate": 4.659090909090909e-05, + "loss": 0.6093, + "step": 3340 + }, + { + "epoch": 0.78, + "learning_rate": 4.5643939393939396e-05, + "loss": 0.6166, + "step": 3360 + }, + { + "epoch": 0.78, + "learning_rate": 4.46969696969697e-05, + "loss": 0.6046, + "step": 3380 + }, + { + "epoch": 0.79, + "learning_rate": 4.375e-05, + "loss": 0.6297, + "step": 3400 + }, + { + "epoch": 0.79, + "learning_rate": 4.2803030303030305e-05, + "loss": 0.6106, + "step": 3420 + }, + { + "epoch": 0.8, + "learning_rate": 4.185606060606061e-05, + "loss": 0.6064, + "step": 3440 + }, + { + "epoch": 0.8, + "learning_rate": 4.0909090909090915e-05, + "loss": 0.6138, + "step": 3460 + }, + { + "epoch": 0.8, + "learning_rate": 3.996212121212121e-05, + "loss": 0.6102, + "step": 3480 + }, + { + "epoch": 0.81, + "learning_rate": 3.901515151515152e-05, + "loss": 0.61, + "step": 3500 + } + ], + "logging_steps": 20, + "max_steps": 4324, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 5.559685980090532e+17, + "trial_name": null, + "trial_params": null +} diff --git a/trainer_outputs/checkpoint-3500/training_args.bin b/trainer_outputs/checkpoint-3500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee6008563f39382fbd86c959b2567ac52f8f8ada --- /dev/null +++ b/trainer_outputs/checkpoint-3500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7228d256901ac224db1b4d229c9c22d5c5a0db9d57b13da26daf55eb856deda8 +size 4027 diff --git a/trainer_outputs/checkpoint-4000/README.md b/trainer_outputs/checkpoint-4000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c --- /dev/null +++ b/trainer_outputs/checkpoint-4000/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: QuantizationMethod.BITS_AND_BYTES +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.4.0.dev0 diff --git a/trainer_outputs/checkpoint-4000/adapter_config.json b/trainer_outputs/checkpoint-4000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5151aebe3c16ec286f715ae36817f8ff2eff5704 --- /dev/null +++ b/trainer_outputs/checkpoint-4000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/trainer_outputs/checkpoint-4000/adapter_model.bin b/trainer_outputs/checkpoint-4000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e156c3ffc0106b7a082a7b8178ac5860399e6f3 --- /dev/null +++ b/trainer_outputs/checkpoint-4000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48c6d51eb794dfe57ee059b28e3bd3bb1711d4faa89d06cec354aa026e7cd0d7 +size 25234701 diff --git a/trainer_outputs/checkpoint-4000/optimizer.pt b/trainer_outputs/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..425ba8343c5232cbd4eef33e831df866fdee3c3c --- /dev/null +++ b/trainer_outputs/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f32de9ac4a724b8c6edea2506811b6ec16562aba5a38757017259c29021b0a1 +size 50492421 diff --git a/trainer_outputs/checkpoint-4000/rng_state.pth b/trainer_outputs/checkpoint-4000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..85dd03480b9ea531ef836354752f701bf6fc10e5 --- /dev/null +++ b/trainer_outputs/checkpoint-4000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b6d0da58141ede52cd106b7e677c6f0afd8d7f49ca76a66afdc98e141138ea6 +size 14575 diff --git a/trainer_outputs/checkpoint-4000/scheduler.pt b/trainer_outputs/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf1e03025fec44f6d4658f61c6cb81f2eae0c6ab --- /dev/null +++ b/trainer_outputs/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26814e85b6c783a7cc9b312a7ad4f455737fbde929dc3d422aa5331d52bb7fd6 +size 627 diff --git a/trainer_outputs/checkpoint-4000/trainer_state.json b/trainer_outputs/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e94c08045c80959d6eb54dbf067b8af631f35f4b --- /dev/null +++ b/trainer_outputs/checkpoint-4000/trainer_state.json @@ -0,0 +1,1219 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9249089542748136, + "eval_steps": 500, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.0185, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 0.8649, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 0.7784, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 0.7386, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001990530303030303, + "loss": 0.7019, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001981060606060606, + "loss": 0.7117, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019715909090909094, + "loss": 0.672, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019621212121212123, + "loss": 0.664, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019526515151515152, + "loss": 0.6666, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001943181818181818, + "loss": 0.6685, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019337121212121213, + "loss": 0.6788, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019242424242424245, + "loss": 0.6673, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019147727272727274, + "loss": 0.6628, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019053030303030303, + "loss": 0.6643, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018958333333333332, + "loss": 0.6607, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018863636363636364, + "loss": 0.6706, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018768939393939396, + "loss": 0.6709, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018674242424242425, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018579545454545454, + "loss": 0.6566, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018484848484848484, + "loss": 0.6513, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018390151515151518, + "loss": 0.6797, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018295454545454547, + "loss": 0.6599, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018200757575757577, + "loss": 0.6561, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018106060606060606, + "loss": 0.662, + "step": 500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018011363636363638, + "loss": 0.6629, + "step": 520 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001791666666666667, + "loss": 0.6475, + "step": 540 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017821969696969699, + "loss": 0.6607, + "step": 560 + }, + { + "epoch": 0.13, + "learning_rate": 0.00017727272727272728, + "loss": 0.6512, + "step": 580 + }, + { + "epoch": 0.14, + "learning_rate": 0.00017632575757575757, + "loss": 0.6484, + "step": 600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001753787878787879, + "loss": 0.6403, + "step": 620 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001744318181818182, + "loss": 0.6537, + "step": 640 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001734848484848485, + "loss": 0.6516, + "step": 660 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001725378787878788, + "loss": 0.6577, + "step": 680 + }, + { + "epoch": 0.16, + "learning_rate": 0.00017159090909090908, + "loss": 0.6374, + "step": 700 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001706439393939394, + "loss": 0.6551, + "step": 720 + }, + { + "epoch": 0.17, + "learning_rate": 0.00016969696969696972, + "loss": 0.6388, + "step": 740 + }, + { + "epoch": 0.18, + "learning_rate": 0.00016875, + "loss": 0.64, + "step": 760 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001678030303030303, + "loss": 0.6579, + "step": 780 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001668560606060606, + "loss": 0.6525, + "step": 800 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016590909090909094, + "loss": 0.6261, + "step": 820 + }, + { + "epoch": 0.19, + "learning_rate": 0.00016496212121212123, + "loss": 0.6351, + "step": 840 + }, + { + "epoch": 0.2, + "learning_rate": 0.00016401515151515152, + "loss": 0.6537, + "step": 860 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001630681818181818, + "loss": 0.6448, + "step": 880 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016212121212121213, + "loss": 0.638, + "step": 900 + }, + { + "epoch": 0.21, + "learning_rate": 0.00016117424242424245, + "loss": 0.6503, + "step": 920 + }, + { + "epoch": 0.22, + "learning_rate": 0.00016022727272727274, + "loss": 0.6378, + "step": 940 + }, + { + "epoch": 0.22, + "learning_rate": 0.00015928030303030303, + "loss": 0.643, + "step": 960 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015833333333333332, + "loss": 0.6235, + "step": 980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00015738636363636364, + "loss": 0.647, + "step": 1000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015643939393939396, + "loss": 0.6408, + "step": 1020 + }, + { + "epoch": 0.24, + "learning_rate": 0.00015549242424242425, + "loss": 0.6391, + "step": 1040 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015454545454545454, + "loss": 0.6356, + "step": 1060 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015359848484848484, + "loss": 0.6317, + "step": 1080 + }, + { + "epoch": 0.25, + "learning_rate": 0.00015265151515151515, + "loss": 0.6413, + "step": 1100 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015170454545454547, + "loss": 0.6338, + "step": 1120 + }, + { + "epoch": 0.26, + "learning_rate": 0.00015075757575757576, + "loss": 0.6422, + "step": 1140 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014981060606060606, + "loss": 0.6442, + "step": 1160 + }, + { + "epoch": 0.27, + "learning_rate": 0.00014886363636363635, + "loss": 0.6523, + "step": 1180 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001479166666666667, + "loss": 0.6349, + "step": 1200 + }, + { + "epoch": 0.28, + "learning_rate": 0.00014696969696969698, + "loss": 0.6389, + "step": 1220 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014602272727272728, + "loss": 0.6468, + "step": 1240 + }, + { + "epoch": 0.29, + "learning_rate": 0.00014507575757575757, + "loss": 0.6431, + "step": 1260 + }, + { + "epoch": 0.3, + "learning_rate": 0.00014412878787878789, + "loss": 0.6287, + "step": 1280 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001431818181818182, + "loss": 0.6438, + "step": 1300 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001422348484848485, + "loss": 0.6274, + "step": 1320 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001412878787878788, + "loss": 0.6286, + "step": 1340 + }, + { + "epoch": 0.31, + "learning_rate": 0.00014034090909090908, + "loss": 0.6401, + "step": 1360 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001393939393939394, + "loss": 0.6472, + "step": 1380 + }, + { + "epoch": 0.32, + "learning_rate": 0.00013844696969696972, + "loss": 0.6458, + "step": 1400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001375, + "loss": 0.6117, + "step": 1420 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001365530303030303, + "loss": 0.6271, + "step": 1440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001356060606060606, + "loss": 0.6287, + "step": 1460 + }, + { + "epoch": 0.34, + "learning_rate": 0.00013465909090909094, + "loss": 0.6336, + "step": 1480 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013371212121212123, + "loss": 0.6404, + "step": 1500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00013276515151515152, + "loss": 0.6312, + "step": 1520 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001318181818181818, + "loss": 0.6168, + "step": 1540 + }, + { + "epoch": 0.36, + "learning_rate": 0.00013087121212121213, + "loss": 0.6272, + "step": 1560 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012992424242424245, + "loss": 0.6477, + "step": 1580 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012897727272727274, + "loss": 0.6477, + "step": 1600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00012803030303030303, + "loss": 0.6227, + "step": 1620 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012708333333333332, + "loss": 0.6224, + "step": 1640 + }, + { + "epoch": 0.38, + "learning_rate": 0.00012613636363636364, + "loss": 0.6315, + "step": 1660 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012518939393939396, + "loss": 0.631, + "step": 1680 + }, + { + "epoch": 0.39, + "learning_rate": 0.00012424242424242425, + "loss": 0.6285, + "step": 1700 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012329545454545454, + "loss": 0.6359, + "step": 1720 + }, + { + "epoch": 0.4, + "learning_rate": 0.00012234848484848484, + "loss": 0.6282, + "step": 1740 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012140151515151517, + "loss": 0.6196, + "step": 1760 + }, + { + "epoch": 0.41, + "learning_rate": 0.00012045454545454546, + "loss": 0.6346, + "step": 1780 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011950757575757576, + "loss": 0.6323, + "step": 1800 + }, + { + "epoch": 0.42, + "learning_rate": 0.00011856060606060606, + "loss": 0.6108, + "step": 1820 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011761363636363636, + "loss": 0.6324, + "step": 1840 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011666666666666668, + "loss": 0.618, + "step": 1860 + }, + { + "epoch": 0.43, + "learning_rate": 0.00011571969696969698, + "loss": 0.6099, + "step": 1880 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011477272727272728, + "loss": 0.6251, + "step": 1900 + }, + { + "epoch": 0.44, + "learning_rate": 0.00011382575757575758, + "loss": 0.6209, + "step": 1920 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001128787878787879, + "loss": 0.6218, + "step": 1940 + }, + { + "epoch": 0.45, + "learning_rate": 0.00011193181818181819, + "loss": 0.6299, + "step": 1960 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001109848484848485, + "loss": 0.6211, + "step": 1980 + }, + { + "epoch": 0.46, + "learning_rate": 0.00011003787878787879, + "loss": 0.6072, + "step": 2000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010909090909090909, + "loss": 0.6264, + "step": 2020 + }, + { + "epoch": 0.47, + "learning_rate": 0.00010814393939393941, + "loss": 0.6248, + "step": 2040 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001071969696969697, + "loss": 0.6125, + "step": 2060 + }, + { + "epoch": 0.48, + "learning_rate": 0.00010625000000000001, + "loss": 0.6294, + "step": 2080 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001053030303030303, + "loss": 0.6193, + "step": 2100 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001043560606060606, + "loss": 0.6293, + "step": 2120 + }, + { + "epoch": 0.49, + "learning_rate": 0.00010340909090909092, + "loss": 0.629, + "step": 2140 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010246212121212121, + "loss": 0.6353, + "step": 2160 + }, + { + "epoch": 0.5, + "learning_rate": 0.00010151515151515152, + "loss": 0.6268, + "step": 2180 + }, + { + "epoch": 0.51, + "learning_rate": 0.00010056818181818181, + "loss": 0.6256, + "step": 2200 + }, + { + "epoch": 0.51, + "learning_rate": 9.962121212121213e-05, + "loss": 0.6404, + "step": 2220 + }, + { + "epoch": 0.52, + "learning_rate": 9.867424242424242e-05, + "loss": 0.6347, + "step": 2240 + }, + { + "epoch": 0.52, + "learning_rate": 9.772727272727274e-05, + "loss": 0.6317, + "step": 2260 + }, + { + "epoch": 0.53, + "learning_rate": 9.678030303030303e-05, + "loss": 0.6335, + "step": 2280 + }, + { + "epoch": 0.53, + "learning_rate": 9.583333333333334e-05, + "loss": 0.6239, + "step": 2300 + }, + { + "epoch": 0.54, + "learning_rate": 9.488636363636364e-05, + "loss": 0.6302, + "step": 2320 + }, + { + "epoch": 0.54, + "learning_rate": 9.393939393939395e-05, + "loss": 0.6182, + "step": 2340 + }, + { + "epoch": 0.55, + "learning_rate": 9.299242424242425e-05, + "loss": 0.6219, + "step": 2360 + }, + { + "epoch": 0.55, + "learning_rate": 9.204545454545454e-05, + "loss": 0.6229, + "step": 2380 + }, + { + "epoch": 0.55, + "learning_rate": 9.109848484848486e-05, + "loss": 0.6413, + "step": 2400 + }, + { + "epoch": 0.56, + "learning_rate": 9.015151515151515e-05, + "loss": 0.6237, + "step": 2420 + }, + { + "epoch": 0.56, + "learning_rate": 8.920454545454546e-05, + "loss": 0.6397, + "step": 2440 + }, + { + "epoch": 0.57, + "learning_rate": 8.825757575757576e-05, + "loss": 0.6259, + "step": 2460 + }, + { + "epoch": 0.57, + "learning_rate": 8.731060606060605e-05, + "loss": 0.634, + "step": 2480 + }, + { + "epoch": 0.58, + "learning_rate": 8.636363636363637e-05, + "loss": 0.622, + "step": 2500 + }, + { + "epoch": 0.58, + "learning_rate": 8.541666666666666e-05, + "loss": 0.6279, + "step": 2520 + }, + { + "epoch": 0.59, + "learning_rate": 8.446969696969697e-05, + "loss": 0.6306, + "step": 2540 + }, + { + "epoch": 0.59, + "learning_rate": 8.352272727272727e-05, + "loss": 0.6288, + "step": 2560 + }, + { + "epoch": 0.6, + "learning_rate": 8.257575757575758e-05, + "loss": 0.6297, + "step": 2580 + }, + { + "epoch": 0.6, + "learning_rate": 8.162878787878789e-05, + "loss": 0.6119, + "step": 2600 + }, + { + "epoch": 0.61, + "learning_rate": 8.068181818181818e-05, + "loss": 0.6227, + "step": 2620 + }, + { + "epoch": 0.61, + "learning_rate": 7.97348484848485e-05, + "loss": 0.6317, + "step": 2640 + }, + { + "epoch": 0.62, + "learning_rate": 7.878787878787879e-05, + "loss": 0.619, + "step": 2660 + }, + { + "epoch": 0.62, + "learning_rate": 7.784090909090909e-05, + "loss": 0.6156, + "step": 2680 + }, + { + "epoch": 0.62, + "learning_rate": 7.68939393939394e-05, + "loss": 0.6116, + "step": 2700 + }, + { + "epoch": 0.63, + "learning_rate": 7.59469696969697e-05, + "loss": 0.6009, + "step": 2720 + }, + { + "epoch": 0.63, + "learning_rate": 7.500000000000001e-05, + "loss": 0.6226, + "step": 2740 + }, + { + "epoch": 0.64, + "learning_rate": 7.40530303030303e-05, + "loss": 0.6246, + "step": 2760 + }, + { + "epoch": 0.64, + "learning_rate": 7.310606060606062e-05, + "loss": 0.6035, + "step": 2780 + }, + { + "epoch": 0.65, + "learning_rate": 7.215909090909091e-05, + "loss": 0.6059, + "step": 2800 + }, + { + "epoch": 0.65, + "learning_rate": 7.121212121212121e-05, + "loss": 0.6387, + "step": 2820 + }, + { + "epoch": 0.66, + "learning_rate": 7.026515151515152e-05, + "loss": 0.6314, + "step": 2840 + }, + { + "epoch": 0.66, + "learning_rate": 6.931818181818182e-05, + "loss": 0.6246, + "step": 2860 + }, + { + "epoch": 0.67, + "learning_rate": 6.837121212121213e-05, + "loss": 0.6448, + "step": 2880 + }, + { + "epoch": 0.67, + "learning_rate": 6.742424242424242e-05, + "loss": 0.5997, + "step": 2900 + }, + { + "epoch": 0.68, + "learning_rate": 6.647727272727274e-05, + "loss": 0.6238, + "step": 2920 + }, + { + "epoch": 0.68, + "learning_rate": 6.553030303030303e-05, + "loss": 0.614, + "step": 2940 + }, + { + "epoch": 0.68, + "learning_rate": 6.458333333333334e-05, + "loss": 0.6162, + "step": 2960 + }, + { + "epoch": 0.69, + "learning_rate": 6.363636363636364e-05, + "loss": 0.6087, + "step": 2980 + }, + { + "epoch": 0.69, + "learning_rate": 6.268939393939395e-05, + "loss": 0.6215, + "step": 3000 + }, + { + "epoch": 0.7, + "learning_rate": 6.174242424242425e-05, + "loss": 0.6022, + "step": 3020 + }, + { + "epoch": 0.7, + "learning_rate": 6.079545454545454e-05, + "loss": 0.6294, + "step": 3040 + }, + { + "epoch": 0.71, + "learning_rate": 5.9848484848484854e-05, + "loss": 0.6187, + "step": 3060 + }, + { + "epoch": 0.71, + "learning_rate": 5.890151515151515e-05, + "loss": 0.6194, + "step": 3080 + }, + { + "epoch": 0.72, + "learning_rate": 5.7954545454545464e-05, + "loss": 0.6109, + "step": 3100 + }, + { + "epoch": 0.72, + "learning_rate": 5.700757575757576e-05, + "loss": 0.638, + "step": 3120 + }, + { + "epoch": 0.73, + "learning_rate": 5.606060606060606e-05, + "loss": 0.6156, + "step": 3140 + }, + { + "epoch": 0.73, + "learning_rate": 5.5113636363636366e-05, + "loss": 0.6209, + "step": 3160 + }, + { + "epoch": 0.74, + "learning_rate": 5.4166666666666664e-05, + "loss": 0.6068, + "step": 3180 + }, + { + "epoch": 0.74, + "learning_rate": 5.3219696969696976e-05, + "loss": 0.6161, + "step": 3200 + }, + { + "epoch": 0.74, + "learning_rate": 5.2272727272727274e-05, + "loss": 0.6354, + "step": 3220 + }, + { + "epoch": 0.75, + "learning_rate": 5.132575757575758e-05, + "loss": 0.6317, + "step": 3240 + }, + { + "epoch": 0.75, + "learning_rate": 5.037878787878788e-05, + "loss": 0.6227, + "step": 3260 + }, + { + "epoch": 0.76, + "learning_rate": 4.943181818181818e-05, + "loss": 0.6176, + "step": 3280 + }, + { + "epoch": 0.76, + "learning_rate": 4.848484848484849e-05, + "loss": 0.6274, + "step": 3300 + }, + { + "epoch": 0.77, + "learning_rate": 4.753787878787879e-05, + "loss": 0.6159, + "step": 3320 + }, + { + "epoch": 0.77, + "learning_rate": 4.659090909090909e-05, + "loss": 0.6093, + "step": 3340 + }, + { + "epoch": 0.78, + "learning_rate": 4.5643939393939396e-05, + "loss": 0.6166, + "step": 3360 + }, + { + "epoch": 0.78, + "learning_rate": 4.46969696969697e-05, + "loss": 0.6046, + "step": 3380 + }, + { + "epoch": 0.79, + "learning_rate": 4.375e-05, + "loss": 0.6297, + "step": 3400 + }, + { + "epoch": 0.79, + "learning_rate": 4.2803030303030305e-05, + "loss": 0.6106, + "step": 3420 + }, + { + "epoch": 0.8, + "learning_rate": 4.185606060606061e-05, + "loss": 0.6064, + "step": 3440 + }, + { + "epoch": 0.8, + "learning_rate": 4.0909090909090915e-05, + "loss": 0.6138, + "step": 3460 + }, + { + "epoch": 0.8, + "learning_rate": 3.996212121212121e-05, + "loss": 0.6102, + "step": 3480 + }, + { + "epoch": 0.81, + "learning_rate": 3.901515151515152e-05, + "loss": 0.61, + "step": 3500 + }, + { + "epoch": 0.81, + "learning_rate": 3.8068181818181816e-05, + "loss": 0.6107, + "step": 3520 + }, + { + "epoch": 0.82, + "learning_rate": 3.712121212121212e-05, + "loss": 0.6086, + "step": 3540 + }, + { + "epoch": 0.82, + "learning_rate": 3.6174242424242427e-05, + "loss": 0.6163, + "step": 3560 + }, + { + "epoch": 0.83, + "learning_rate": 3.522727272727273e-05, + "loss": 0.636, + "step": 3580 + }, + { + "epoch": 0.83, + "learning_rate": 3.428030303030303e-05, + "loss": 0.6237, + "step": 3600 + }, + { + "epoch": 0.84, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.6187, + "step": 3620 + }, + { + "epoch": 0.84, + "learning_rate": 3.238636363636364e-05, + "loss": 0.6195, + "step": 3640 + }, + { + "epoch": 0.85, + "learning_rate": 3.143939393939394e-05, + "loss": 0.6333, + "step": 3660 + }, + { + "epoch": 0.85, + "learning_rate": 3.0492424242424243e-05, + "loss": 0.613, + "step": 3680 + }, + { + "epoch": 0.86, + "learning_rate": 2.954545454545455e-05, + "loss": 0.6452, + "step": 3700 + }, + { + "epoch": 0.86, + "learning_rate": 2.8598484848484853e-05, + "loss": 0.6202, + "step": 3720 + }, + { + "epoch": 0.86, + "learning_rate": 2.7651515151515152e-05, + "loss": 0.6159, + "step": 3740 + }, + { + "epoch": 0.87, + "learning_rate": 2.6704545454545453e-05, + "loss": 0.6231, + "step": 3760 + }, + { + "epoch": 0.87, + "learning_rate": 2.575757575757576e-05, + "loss": 0.6287, + "step": 3780 + }, + { + "epoch": 0.88, + "learning_rate": 2.4810606060606064e-05, + "loss": 0.6168, + "step": 3800 + }, + { + "epoch": 0.88, + "learning_rate": 2.3863636363636365e-05, + "loss": 0.6125, + "step": 3820 + }, + { + "epoch": 0.89, + "learning_rate": 2.2916666666666667e-05, + "loss": 0.6122, + "step": 3840 + }, + { + "epoch": 0.89, + "learning_rate": 2.1969696969696972e-05, + "loss": 0.6087, + "step": 3860 + }, + { + "epoch": 0.9, + "learning_rate": 2.1022727272727274e-05, + "loss": 0.6207, + "step": 3880 + }, + { + "epoch": 0.9, + "learning_rate": 2.0075757575757575e-05, + "loss": 0.6175, + "step": 3900 + }, + { + "epoch": 0.91, + "learning_rate": 1.912878787878788e-05, + "loss": 0.6029, + "step": 3920 + }, + { + "epoch": 0.91, + "learning_rate": 1.8181818181818182e-05, + "loss": 0.6183, + "step": 3940 + }, + { + "epoch": 0.92, + "learning_rate": 1.7234848484848487e-05, + "loss": 0.6214, + "step": 3960 + }, + { + "epoch": 0.92, + "learning_rate": 1.628787878787879e-05, + "loss": 0.6314, + "step": 3980 + }, + { + "epoch": 0.92, + "learning_rate": 1.534090909090909e-05, + "loss": 0.6215, + "step": 4000 + } + ], + "logging_steps": 20, + "max_steps": 4324, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 6.358088323636838e+17, + "trial_name": null, + "trial_params": null +} diff --git a/trainer_outputs/checkpoint-4000/training_args.bin b/trainer_outputs/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee6008563f39382fbd86c959b2567ac52f8f8ada --- /dev/null +++ b/trainer_outputs/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7228d256901ac224db1b4d229c9c22d5c5a0db9d57b13da26daf55eb856deda8 +size 4027 diff --git a/trainer_outputs/checkpoint-500/README.md b/trainer_outputs/checkpoint-500/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fec5ec0d6e1de5cf587a28ce0b5ba00ffc4e973c --- /dev/null +++ b/trainer_outputs/checkpoint-500/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: QuantizationMethod.BITS_AND_BYTES +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.4.0.dev0 diff --git a/trainer_outputs/checkpoint-500/adapter_config.json b/trainer_outputs/checkpoint-500/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5151aebe3c16ec286f715ae36817f8ff2eff5704 --- /dev/null +++ b/trainer_outputs/checkpoint-500/adapter_config.json @@ -0,0 +1,21 @@ +{ + "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "k_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/trainer_outputs/checkpoint-500/adapter_model.bin b/trainer_outputs/checkpoint-500/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2121ec305a3730dbdb810d52c7fbb2f3aa2f9be2 --- /dev/null +++ b/trainer_outputs/checkpoint-500/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:119f80ccf1deac80ece5fb8b70c831cbd735beb5114fb79ea94330b71ae12b2d +size 25234701 diff --git a/trainer_outputs/checkpoint-500/optimizer.pt b/trainer_outputs/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..532f1ea05b35ac585a596ce19c2f3fb6e30c5de7 --- /dev/null +++ b/trainer_outputs/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbeead82cd6252ec154decd3b65ce2e2b366b354cf2e375ca863d76e17adae14 +size 50492421 diff --git a/trainer_outputs/checkpoint-500/rng_state.pth b/trainer_outputs/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..921a43db1b18028d104b0a1d2663edfa20b7e683 --- /dev/null +++ b/trainer_outputs/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dcb9a5148ff55033957e0b4acb2bb5a6dc0999ebe59804a69eddc99745bacf6 +size 14575 diff --git a/trainer_outputs/checkpoint-500/scheduler.pt b/trainer_outputs/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5c5b9d89172d6ca41851368e64c2412d736e215 --- /dev/null +++ b/trainer_outputs/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50bf88e092fe80b0bb59e6abb8cf03180cb12601ffb051ae278bc6b895953e58 +size 627 diff --git a/trainer_outputs/checkpoint-500/trainer_state.json b/trainer_outputs/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..27469b46eed1d225c121be4cf32e0cd5e7f718a1 --- /dev/null +++ b/trainer_outputs/checkpoint-500/trainer_state.json @@ -0,0 +1,169 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1156136192843517, + "eval_steps": 500, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4e-05, + "loss": 1.0185, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 8e-05, + "loss": 0.8649, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00012, + "loss": 0.7784, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 0.00016, + "loss": 0.7386, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001990530303030303, + "loss": 0.7019, + "step": 120 + }, + { + "epoch": 0.03, + "learning_rate": 0.0001981060606060606, + "loss": 0.7117, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019715909090909094, + "loss": 0.672, + "step": 160 + }, + { + "epoch": 0.04, + "learning_rate": 0.00019621212121212123, + "loss": 0.664, + "step": 180 + }, + { + "epoch": 0.05, + "learning_rate": 0.00019526515151515152, + "loss": 0.6666, + "step": 200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0001943181818181818, + "loss": 0.6685, + "step": 220 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019337121212121213, + "loss": 0.6788, + "step": 240 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019242424242424245, + "loss": 0.6673, + "step": 260 + }, + { + "epoch": 0.06, + "learning_rate": 0.00019147727272727274, + "loss": 0.6628, + "step": 280 + }, + { + "epoch": 0.07, + "learning_rate": 0.00019053030303030303, + "loss": 0.6643, + "step": 300 + }, + { + "epoch": 0.07, + "learning_rate": 0.00018958333333333332, + "loss": 0.6607, + "step": 320 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018863636363636364, + "loss": 0.6706, + "step": 340 + }, + { + "epoch": 0.08, + "learning_rate": 0.00018768939393939396, + "loss": 0.6709, + "step": 360 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018674242424242425, + "loss": 0.6616, + "step": 380 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018579545454545454, + "loss": 0.6566, + "step": 400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018484848484848484, + "loss": 0.6513, + "step": 420 + }, + { + "epoch": 0.1, + "learning_rate": 0.00018390151515151518, + "loss": 0.6797, + "step": 440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018295454545454547, + "loss": 0.6599, + "step": 460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00018200757575757577, + "loss": 0.6561, + "step": 480 + }, + { + "epoch": 0.12, + "learning_rate": 0.00018106060606060606, + "loss": 0.662, + "step": 500 + } + ], + "logging_steps": 20, + "max_steps": 4324, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 7.92890273426473e+16, + "trial_name": null, + "trial_params": null +} diff --git a/trainer_outputs/checkpoint-500/training_args.bin b/trainer_outputs/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee6008563f39382fbd86c959b2567ac52f8f8ada --- /dev/null +++ b/trainer_outputs/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7228d256901ac224db1b4d229c9c22d5c5a0db9d57b13da26daf55eb856deda8 +size 4027