{ | |
"trainer": { | |
"evaluation_strategy": "steps", | |
"per_device_train_batch_size": 4, | |
"per_device_eval_batch_size": 4, | |
"gradient_accumulation_steps": 32, | |
"eval_steps": 150, | |
"save_steps": 150, | |
"logging_steps": 5, | |
"learning_rate": 0.0003, | |
"num_train_epochs": 3, | |
"lr_scheduler_type": "cosine", | |
"warmup_steps": 100, | |
"fp16": true, | |
"bf16": false, | |
"torch_compile": false, | |
"optim": "adamw_torch" | |
}, | |
"lora": { | |
"r": 16, | |
"lora_alpha": 16, | |
"lora_dropout": 0.05, | |
"bias": "none", | |
"target_modules": ["q_proj", "v_proj"], | |
"task_type": "CAUSAL_LM" | |
}, | |
"load_in_8bit": true, | |
"only_target_loss": false, | |
"model_name": "models/llama-13b-hf", | |
"model_type": "causal", | |
"template_category": "causal_newlines", | |
"max_source_tokens_count": 512, | |
"max_target_tokens_count": 832 | |
} | |