{ | |
"BASE_MODEL": "microsoft/Phi-3.5-mini-instruct", | |
"SEQ_LENGTH": 512, | |
"MAX_STEPS": 250, | |
"BATCH_SIZE": 4, | |
"GR_ACC_STEPS": 4, | |
"LR": 0.0002, | |
"LR_SCHEDULER_TYPE": "cosine", | |
"OPTIMIZER": "adamw_torch", | |
"WEIGHT_DECAY": 0.001, | |
"WARMUP_RATIO": 0.05, | |
"EVAL_FREQ": 10, | |
"SAVE_FREQ": 20, | |
"SAVE_LIMIT": 2, | |
"LOG_FREQ": 1, | |
"BF16": true, | |
"FP16": false, | |
"FIM_RATE": 0.5, | |
"FIM_SPM_RAT": 0.5, | |
"LORA_R": 16, | |
"LORA_ALPHA": 48, | |
"LORA_DROPOUT": 0.0, | |
"LORA_TARGET_MODULES": "all-linear", | |
"USE_NESTED_QUANT": true, | |
"BNB_4BIT_COMPUTE_DTYPE": "bfloat16", | |
"load_in_8bit": true, | |
"SEED": 0, | |
"EARLY_STOP_PATIENCE": 3, | |
"EARLY_STOP_THRESHOLD": 0.01 | |
} |