|
{ |
|
"best_metric": 0.6586877107620239, |
|
"best_model_checkpoint": "saves/Gemma-7B/lora/train_1/checkpoint-100", |
|
"epoch": 0.21875854525567404, |
|
"eval_steps": 10, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"grad_norm": 16.269868850708008, |
|
"learning_rate": 0.00029999015487222375, |
|
"loss": 3.1586, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010937927262783703, |
|
"eval_loss": 0.7906885147094727, |
|
"eval_runtime": 170.0021, |
|
"eval_samples_per_second": 27.717, |
|
"eval_steps_per_second": 0.871, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"grad_norm": 1.8665393590927124, |
|
"learning_rate": 0.00029996062078124905, |
|
"loss": 0.6717, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021875854525567406, |
|
"eval_loss": 0.7608615159988403, |
|
"eval_runtime": 168.7111, |
|
"eval_samples_per_second": 27.929, |
|
"eval_steps_per_second": 0.877, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"grad_norm": 2.046764373779297, |
|
"learning_rate": 0.0002999114016039678, |
|
"loss": 0.5741, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03281378178835111, |
|
"eval_loss": 0.7403788566589355, |
|
"eval_runtime": 169.3669, |
|
"eval_samples_per_second": 27.821, |
|
"eval_steps_per_second": 0.874, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"grad_norm": 0.8763797879219055, |
|
"learning_rate": 0.00029984250380130117, |
|
"loss": 0.5809, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.04375170905113481, |
|
"eval_loss": 0.7739446759223938, |
|
"eval_runtime": 169.4581, |
|
"eval_samples_per_second": 27.806, |
|
"eval_steps_per_second": 0.873, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"grad_norm": 1.452701449394226, |
|
"learning_rate": 0.0002997539364173515, |
|
"loss": 0.5313, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05468963631391851, |
|
"eval_loss": 0.7001951932907104, |
|
"eval_runtime": 169.6448, |
|
"eval_samples_per_second": 27.776, |
|
"eval_steps_per_second": 0.872, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"grad_norm": 2.0603904724121094, |
|
"learning_rate": 0.00029964571107821494, |
|
"loss": 0.4879, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.06562756357670221, |
|
"eval_loss": 0.7159304022789001, |
|
"eval_runtime": 169.4832, |
|
"eval_samples_per_second": 27.802, |
|
"eval_steps_per_second": 0.873, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"grad_norm": 1.048985242843628, |
|
"learning_rate": 0.00029951784199045534, |
|
"loss": 0.4665, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.07656549083948591, |
|
"eval_loss": 0.7063425779342651, |
|
"eval_runtime": 169.1768, |
|
"eval_samples_per_second": 27.853, |
|
"eval_steps_per_second": 0.875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"grad_norm": 0.8847436904907227, |
|
"learning_rate": 0.0002993703459392396, |
|
"loss": 0.4509, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.08750341810226962, |
|
"eval_loss": 0.6991614103317261, |
|
"eval_runtime": 169.2261, |
|
"eval_samples_per_second": 27.844, |
|
"eval_steps_per_second": 0.875, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"grad_norm": 1.09526789188385, |
|
"learning_rate": 0.00029920324228613376, |
|
"loss": 0.4542, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.09844134536505332, |
|
"eval_loss": 0.6915357708930969, |
|
"eval_runtime": 169.6804, |
|
"eval_samples_per_second": 27.77, |
|
"eval_steps_per_second": 0.872, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"grad_norm": 0.9263470768928528, |
|
"learning_rate": 0.0002990165529665622, |
|
"loss": 0.4188, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10937927262783702, |
|
"eval_loss": 0.6586877107620239, |
|
"eval_runtime": 169.6821, |
|
"eval_samples_per_second": 27.77, |
|
"eval_steps_per_second": 0.872, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"grad_norm": 1.2452396154403687, |
|
"learning_rate": 0.0002988103024869277, |
|
"loss": 0.4131, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12031719989062073, |
|
"eval_loss": 0.6637363433837891, |
|
"eval_runtime": 169.6991, |
|
"eval_samples_per_second": 27.767, |
|
"eval_steps_per_second": 0.872, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"grad_norm": 0.8261873722076416, |
|
"learning_rate": 0.00029858451792139453, |
|
"loss": 0.4137, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.13125512715340443, |
|
"eval_loss": 0.6902170181274414, |
|
"eval_runtime": 169.6732, |
|
"eval_samples_per_second": 27.771, |
|
"eval_steps_per_second": 0.872, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"grad_norm": 0.6665583848953247, |
|
"learning_rate": 0.0002983392289083346, |
|
"loss": 0.4087, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14219305441618812, |
|
"eval_loss": 0.6948944330215454, |
|
"eval_runtime": 169.5204, |
|
"eval_samples_per_second": 27.796, |
|
"eval_steps_per_second": 0.873, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"grad_norm": 0.841665506362915, |
|
"learning_rate": 0.0002980744676464371, |
|
"loss": 0.3968, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15313098167897182, |
|
"eval_loss": 0.6712561845779419, |
|
"eval_runtime": 169.6453, |
|
"eval_samples_per_second": 27.776, |
|
"eval_steps_per_second": 0.872, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"grad_norm": 1.1116673946380615, |
|
"learning_rate": 0.0002977902688904813, |
|
"loss": 0.4048, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16406890894175555, |
|
"eval_loss": 0.6878468990325928, |
|
"eval_runtime": 169.3026, |
|
"eval_samples_per_second": 27.832, |
|
"eval_steps_per_second": 0.874, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"grad_norm": 0.7878606915473938, |
|
"learning_rate": 0.00029748666994677467, |
|
"loss": 0.3953, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17500683620453925, |
|
"eval_loss": 0.6906653642654419, |
|
"eval_runtime": 168.3921, |
|
"eval_samples_per_second": 27.982, |
|
"eval_steps_per_second": 0.879, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18594476346732294, |
|
"grad_norm": 0.9028112888336182, |
|
"learning_rate": 0.00029716371066825593, |
|
"loss": 0.3873, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.18594476346732294, |
|
"eval_loss": 0.6937726140022278, |
|
"eval_runtime": 169.1341, |
|
"eval_samples_per_second": 27.86, |
|
"eval_steps_per_second": 0.875, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19688269073010664, |
|
"grad_norm": 0.797646701335907, |
|
"learning_rate": 0.0002968214334492632, |
|
"loss": 0.3821, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19688269073010664, |
|
"eval_loss": 0.684823751449585, |
|
"eval_runtime": 169.2007, |
|
"eval_samples_per_second": 27.849, |
|
"eval_steps_per_second": 0.875, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.20782061799289034, |
|
"grad_norm": 0.7943875193595886, |
|
"learning_rate": 0.00029645988321996917, |
|
"loss": 0.394, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20782061799289034, |
|
"eval_loss": 0.7038875222206116, |
|
"eval_runtime": 169.1579, |
|
"eval_samples_per_second": 27.856, |
|
"eval_steps_per_second": 0.875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21875854525567404, |
|
"grad_norm": 0.797392725944519, |
|
"learning_rate": 0.00029607910744048336, |
|
"loss": 0.3893, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21875854525567404, |
|
"eval_loss": 0.6831381916999817, |
|
"eval_runtime": 169.2814, |
|
"eval_samples_per_second": 27.835, |
|
"eval_steps_per_second": 0.874, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21875854525567404, |
|
"step": 200, |
|
"total_flos": 3.6702510649442304e+17, |
|
"train_loss": 0.5889949607849121, |
|
"train_runtime": 10495.9143, |
|
"train_samples_per_second": 66.895, |
|
"train_steps_per_second": 0.261 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2742, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.6702510649442304e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|