|
{ |
|
"micro_batch_size": 1, |
|
"gradient_accumulation_steps": 1, |
|
"num_train_epochs": 10, |
|
"learning_rate": 0.0001, |
|
"cutoff_len": 512, |
|
"val_set_size": 4974, |
|
"lora_r": 16, |
|
"lora_alpha": 16, |
|
"lora_dropout": 0.05, |
|
"lora_target_modules": [ |
|
"q_proj", |
|
"v_proj", |
|
"o_proj", |
|
"k_proj" |
|
], |
|
"lora_modules_to_save": [], |
|
"train_on_inputs": true, |
|
"group_by_length": false, |
|
"load_in_8bit": false, |
|
"fp16": true, |
|
"bf16": false, |
|
"gradient_checkpointing": false, |
|
"save_steps": 10000, |
|
"save_total_limit": 100, |
|
"logging_steps": 100, |
|
"additional_training_arguments": null, |
|
"additional_lora_config": null |
|
} |