{ "best_metric": 0.5208442211151123, "best_model_checkpoint": "./classifier-posterior-glare-removal/checkpoint-500", "epoch": 10.0, "eval_steps": 50, "global_step": 620, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.16129032258064516, "grad_norm": 1.2819656133651733, "learning_rate": 0.0001967741935483871, "loss": 0.7167, "step": 10 }, { "epoch": 0.3225806451612903, "grad_norm": 0.1421721875667572, "learning_rate": 0.00019354838709677422, "loss": 0.6756, "step": 20 }, { "epoch": 0.4838709677419355, "grad_norm": 0.4423729181289673, "learning_rate": 0.0001903225806451613, "loss": 0.6615, "step": 30 }, { "epoch": 0.6451612903225806, "grad_norm": 0.745327353477478, "learning_rate": 0.0001870967741935484, "loss": 0.6261, "step": 40 }, { "epoch": 0.8064516129032258, "grad_norm": 1.5607924461364746, "learning_rate": 0.00018387096774193548, "loss": 0.626, "step": 50 }, { "epoch": 0.8064516129032258, "eval_accuracy": 0.7581863979848866, "eval_loss": 0.5621981024742126, "eval_runtime": 7.1421, "eval_samples_per_second": 111.172, "eval_steps_per_second": 14.002, "step": 50 }, { "epoch": 0.967741935483871, "grad_norm": 6.628453731536865, "learning_rate": 0.00018064516129032257, "loss": 0.571, "step": 60 }, { "epoch": 1.129032258064516, "grad_norm": 1.2903436422348022, "learning_rate": 0.0001774193548387097, "loss": 0.5387, "step": 70 }, { "epoch": 1.2903225806451613, "grad_norm": 2.7928802967071533, "learning_rate": 0.00017419354838709678, "loss": 0.456, "step": 80 }, { "epoch": 1.4516129032258065, "grad_norm": 2.351067066192627, "learning_rate": 0.0001709677419354839, "loss": 0.4139, "step": 90 }, { "epoch": 1.6129032258064515, "grad_norm": 1.6322267055511475, "learning_rate": 0.00016774193548387098, "loss": 0.4848, "step": 100 }, { "epoch": 1.6129032258064515, "eval_accuracy": 0.6675062972292192, "eval_loss": 0.5951594114303589, "eval_runtime": 6.6482, "eval_samples_per_second": 119.43, "eval_steps_per_second": 15.042, "step": 100 }, { "epoch": 1.7741935483870968, "grad_norm": 1.1790844202041626, "learning_rate": 0.00016451612903225807, "loss": 0.4845, "step": 110 }, { "epoch": 1.935483870967742, "grad_norm": 1.562549114227295, "learning_rate": 0.00016129032258064516, "loss": 0.4279, "step": 120 }, { "epoch": 2.096774193548387, "grad_norm": 1.2606903314590454, "learning_rate": 0.00015806451612903225, "loss": 0.5123, "step": 130 }, { "epoch": 2.258064516129032, "grad_norm": 0.8262944221496582, "learning_rate": 0.00015483870967741937, "loss": 0.2132, "step": 140 }, { "epoch": 2.4193548387096775, "grad_norm": 2.910503387451172, "learning_rate": 0.00015161290322580646, "loss": 0.2195, "step": 150 }, { "epoch": 2.4193548387096775, "eval_accuracy": 0.8324937027707808, "eval_loss": 0.5258287191390991, "eval_runtime": 7.4851, "eval_samples_per_second": 106.078, "eval_steps_per_second": 13.36, "step": 150 }, { "epoch": 2.5806451612903225, "grad_norm": 13.587343215942383, "learning_rate": 0.00014838709677419355, "loss": 0.4568, "step": 160 }, { "epoch": 2.741935483870968, "grad_norm": 2.3012983798980713, "learning_rate": 0.00014516129032258066, "loss": 0.3642, "step": 170 }, { "epoch": 2.903225806451613, "grad_norm": 1.3221534490585327, "learning_rate": 0.00014193548387096775, "loss": 0.281, "step": 180 }, { "epoch": 3.064516129032258, "grad_norm": 1.1407649517059326, "learning_rate": 0.00013870967741935487, "loss": 0.256, "step": 190 }, { "epoch": 3.225806451612903, "grad_norm": 2.6736562252044678, "learning_rate": 0.00013548387096774193, "loss": 0.1967, "step": 200 }, { "epoch": 3.225806451612903, "eval_accuracy": 0.7959697732997482, "eval_loss": 0.5910664796829224, "eval_runtime": 6.3913, "eval_samples_per_second": 124.231, "eval_steps_per_second": 15.646, "step": 200 }, { "epoch": 3.3870967741935485, "grad_norm": 0.46241849660873413, "learning_rate": 0.00013225806451612905, "loss": 0.2156, "step": 210 }, { "epoch": 3.5483870967741935, "grad_norm": 0.2461618185043335, "learning_rate": 0.00012903225806451613, "loss": 0.1093, "step": 220 }, { "epoch": 3.709677419354839, "grad_norm": 2.9271249771118164, "learning_rate": 0.00012580645161290322, "loss": 0.2542, "step": 230 }, { "epoch": 3.870967741935484, "grad_norm": 9.045219421386719, "learning_rate": 0.00012258064516129034, "loss": 0.195, "step": 240 }, { "epoch": 4.032258064516129, "grad_norm": 0.458954393863678, "learning_rate": 0.00011935483870967743, "loss": 0.2945, "step": 250 }, { "epoch": 4.032258064516129, "eval_accuracy": 0.8299748110831234, "eval_loss": 0.496555358171463, "eval_runtime": 8.349, "eval_samples_per_second": 95.101, "eval_steps_per_second": 11.977, "step": 250 }, { "epoch": 4.193548387096774, "grad_norm": 3.6777045726776123, "learning_rate": 0.00011612903225806453, "loss": 0.2313, "step": 260 }, { "epoch": 4.354838709677419, "grad_norm": 1.4112117290496826, "learning_rate": 0.00011290322580645163, "loss": 0.2117, "step": 270 }, { "epoch": 4.516129032258064, "grad_norm": 0.2903359532356262, "learning_rate": 0.00010967741935483871, "loss": 0.1558, "step": 280 }, { "epoch": 4.67741935483871, "grad_norm": 2.1955249309539795, "learning_rate": 0.0001064516129032258, "loss": 0.1586, "step": 290 }, { "epoch": 4.838709677419355, "grad_norm": 3.383190155029297, "learning_rate": 0.0001032258064516129, "loss": 0.1866, "step": 300 }, { "epoch": 4.838709677419355, "eval_accuracy": 0.8350125944584383, "eval_loss": 0.5221933722496033, "eval_runtime": 6.3093, "eval_samples_per_second": 125.846, "eval_steps_per_second": 15.85, "step": 300 }, { "epoch": 5.0, "grad_norm": 0.7602125406265259, "learning_rate": 0.0001, "loss": 0.1878, "step": 310 }, { "epoch": 5.161290322580645, "grad_norm": 0.2806147038936615, "learning_rate": 9.677419354838711e-05, "loss": 0.1729, "step": 320 }, { "epoch": 5.32258064516129, "grad_norm": 0.43375176191329956, "learning_rate": 9.35483870967742e-05, "loss": 0.1427, "step": 330 }, { "epoch": 5.483870967741936, "grad_norm": 2.8998281955718994, "learning_rate": 9.032258064516129e-05, "loss": 0.1513, "step": 340 }, { "epoch": 5.645161290322581, "grad_norm": 1.7477742433547974, "learning_rate": 8.709677419354839e-05, "loss": 0.1211, "step": 350 }, { "epoch": 5.645161290322581, "eval_accuracy": 0.8425692695214105, "eval_loss": 0.5328292846679688, "eval_runtime": 6.3819, "eval_samples_per_second": 124.415, "eval_steps_per_second": 15.669, "step": 350 }, { "epoch": 5.806451612903226, "grad_norm": 0.9376145601272583, "learning_rate": 8.387096774193549e-05, "loss": 0.1373, "step": 360 }, { "epoch": 5.967741935483871, "grad_norm": 1.7760857343673706, "learning_rate": 8.064516129032258e-05, "loss": 0.1163, "step": 370 }, { "epoch": 6.129032258064516, "grad_norm": 0.532788097858429, "learning_rate": 7.741935483870968e-05, "loss": 0.0937, "step": 380 }, { "epoch": 6.290322580645161, "grad_norm": 0.17270487546920776, "learning_rate": 7.419354838709677e-05, "loss": 0.0575, "step": 390 }, { "epoch": 6.451612903225806, "grad_norm": 2.3847575187683105, "learning_rate": 7.096774193548388e-05, "loss": 0.1666, "step": 400 }, { "epoch": 6.451612903225806, "eval_accuracy": 0.8425692695214105, "eval_loss": 0.5544695854187012, "eval_runtime": 6.8246, "eval_samples_per_second": 116.344, "eval_steps_per_second": 14.653, "step": 400 }, { "epoch": 6.612903225806452, "grad_norm": 1.6830391883850098, "learning_rate": 6.774193548387096e-05, "loss": 0.1241, "step": 410 }, { "epoch": 6.774193548387097, "grad_norm": 0.27970582246780396, "learning_rate": 6.451612903225807e-05, "loss": 0.1276, "step": 420 }, { "epoch": 6.935483870967742, "grad_norm": 2.2229976654052734, "learning_rate": 6.129032258064517e-05, "loss": 0.1297, "step": 430 }, { "epoch": 7.096774193548387, "grad_norm": 0.15630574524402618, "learning_rate": 5.8064516129032266e-05, "loss": 0.0689, "step": 440 }, { "epoch": 7.258064516129032, "grad_norm": 0.14564351737499237, "learning_rate": 5.4838709677419355e-05, "loss": 0.0737, "step": 450 }, { "epoch": 7.258064516129032, "eval_accuracy": 0.8526448362720404, "eval_loss": 0.5326716899871826, "eval_runtime": 6.3044, "eval_samples_per_second": 125.944, "eval_steps_per_second": 15.862, "step": 450 }, { "epoch": 7.419354838709677, "grad_norm": 0.1766778826713562, "learning_rate": 5.161290322580645e-05, "loss": 0.0491, "step": 460 }, { "epoch": 7.580645161290323, "grad_norm": 0.08773469924926758, "learning_rate": 4.8387096774193554e-05, "loss": 0.1044, "step": 470 }, { "epoch": 7.741935483870968, "grad_norm": 0.3740708827972412, "learning_rate": 4.516129032258064e-05, "loss": 0.0872, "step": 480 }, { "epoch": 7.903225806451613, "grad_norm": 0.7324288487434387, "learning_rate": 4.1935483870967746e-05, "loss": 0.1041, "step": 490 }, { "epoch": 8.064516129032258, "grad_norm": 0.16782912611961365, "learning_rate": 3.870967741935484e-05, "loss": 0.0314, "step": 500 }, { "epoch": 8.064516129032258, "eval_accuracy": 0.8526448362720404, "eval_loss": 0.5208442211151123, "eval_runtime": 7.4286, "eval_samples_per_second": 106.884, "eval_steps_per_second": 13.461, "step": 500 }, { "epoch": 8.225806451612904, "grad_norm": 0.6218559145927429, "learning_rate": 3.548387096774194e-05, "loss": 0.0453, "step": 510 }, { "epoch": 8.387096774193548, "grad_norm": 0.7076393961906433, "learning_rate": 3.2258064516129034e-05, "loss": 0.0874, "step": 520 }, { "epoch": 8.548387096774194, "grad_norm": 1.3946703672409058, "learning_rate": 2.9032258064516133e-05, "loss": 0.0663, "step": 530 }, { "epoch": 8.709677419354838, "grad_norm": 0.2775959372520447, "learning_rate": 2.5806451612903226e-05, "loss": 0.037, "step": 540 }, { "epoch": 8.870967741935484, "grad_norm": 0.256896436214447, "learning_rate": 2.258064516129032e-05, "loss": 0.0329, "step": 550 }, { "epoch": 8.870967741935484, "eval_accuracy": 0.8488664987405542, "eval_loss": 0.5773429870605469, "eval_runtime": 7.8322, "eval_samples_per_second": 101.376, "eval_steps_per_second": 12.768, "step": 550 }, { "epoch": 9.03225806451613, "grad_norm": 0.2857324182987213, "learning_rate": 1.935483870967742e-05, "loss": 0.0691, "step": 560 }, { "epoch": 9.193548387096774, "grad_norm": 0.3476056456565857, "learning_rate": 1.6129032258064517e-05, "loss": 0.0269, "step": 570 }, { "epoch": 9.35483870967742, "grad_norm": 0.06710684299468994, "learning_rate": 1.2903225806451613e-05, "loss": 0.0278, "step": 580 }, { "epoch": 9.516129032258064, "grad_norm": 0.3127437233924866, "learning_rate": 9.67741935483871e-06, "loss": 0.0513, "step": 590 }, { "epoch": 9.67741935483871, "grad_norm": 0.06437714397907257, "learning_rate": 6.451612903225806e-06, "loss": 0.0497, "step": 600 }, { "epoch": 9.67741935483871, "eval_accuracy": 0.8488664987405542, "eval_loss": 0.5994384288787842, "eval_runtime": 6.8242, "eval_samples_per_second": 116.351, "eval_steps_per_second": 14.654, "step": 600 }, { "epoch": 9.838709677419354, "grad_norm": 0.06262248754501343, "learning_rate": 3.225806451612903e-06, "loss": 0.0485, "step": 610 }, { "epoch": 10.0, "grad_norm": 0.42829158902168274, "learning_rate": 0.0, "loss": 0.0515, "step": 620 }, { "epoch": 10.0, "step": 620, "total_flos": 7.687205369767526e+17, "train_loss": 0.22638947348440847, "train_runtime": 282.1196, "train_samples_per_second": 35.162, "train_steps_per_second": 2.198 } ], "logging_steps": 10, "max_steps": 620, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7.687205369767526e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }