{ "best_metric": null, "best_model_checkpoint": null, "epoch": 49.121212121212125, "eval_steps": 500, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 0.00029999537364671844, "loss": 3.1318, "step": 1 }, { "epoch": 0.12, "eval_loss": 2.835486888885498, "eval_runtime": 2.1688, "eval_samples_per_second": 107.896, "eval_steps_per_second": 3.689, "step": 1 }, { "epoch": 1.12, "eval_loss": 2.6363508701324463, "eval_runtime": 1.538, "eval_samples_per_second": 152.15, "eval_steps_per_second": 5.202, "step": 2 }, { "epoch": 2.12, "eval_loss": 2.4945499897003174, "eval_runtime": 1.526, "eval_samples_per_second": 153.347, "eval_steps_per_second": 5.243, "step": 3 }, { "epoch": 3.12, "eval_loss": 2.5338640213012695, "eval_runtime": 1.5296, "eval_samples_per_second": 152.982, "eval_steps_per_second": 5.23, "step": 4 }, { "epoch": 4.12, "learning_rate": 0.00029988435543610843, "loss": 2.7386, "step": 5 }, { "epoch": 4.12, "eval_loss": 2.3351666927337646, "eval_runtime": 1.5386, "eval_samples_per_second": 152.088, "eval_steps_per_second": 5.2, "step": 5 }, { "epoch": 5.12, "eval_loss": 2.2136902809143066, "eval_runtime": 1.517, "eval_samples_per_second": 154.247, "eval_steps_per_second": 5.273, "step": 6 }, { "epoch": 6.12, "eval_loss": 2.164069652557373, "eval_runtime": 1.5148, "eval_samples_per_second": 154.475, "eval_steps_per_second": 5.281, "step": 7 }, { "epoch": 7.12, "eval_loss": 2.105088710784912, "eval_runtime": 1.5384, "eval_samples_per_second": 152.111, "eval_steps_per_second": 5.2, "step": 8 }, { "epoch": 8.12, "eval_loss": 2.0841920375823975, "eval_runtime": 1.6789, "eval_samples_per_second": 139.374, "eval_steps_per_second": 4.765, "step": 9 }, { "epoch": 9.12, "learning_rate": 0.00029953760005996916, "loss": 2.269, "step": 10 }, { "epoch": 9.12, "eval_loss": 2.047882556915283, "eval_runtime": 1.5168, "eval_samples_per_second": 154.272, "eval_steps_per_second": 5.274, "step": 10 }, { "epoch": 10.12, "eval_loss": 1.9553548097610474, "eval_runtime": 1.5263, "eval_samples_per_second": 153.317, "eval_steps_per_second": 5.242, "step": 11 }, { "epoch": 11.12, "eval_loss": 1.8555233478546143, "eval_runtime": 1.527, "eval_samples_per_second": 153.245, "eval_steps_per_second": 5.239, "step": 12 }, { "epoch": 12.12, "eval_loss": 1.7735551595687866, "eval_runtime": 1.5194, "eval_samples_per_second": 154.009, "eval_steps_per_second": 5.265, "step": 13 }, { "epoch": 13.12, "eval_loss": 1.7906467914581299, "eval_runtime": 1.5217, "eval_samples_per_second": 153.779, "eval_steps_per_second": 5.257, "step": 14 }, { "epoch": 14.12, "learning_rate": 0.00029896026854323894, "loss": 1.9451, "step": 15 }, { "epoch": 14.12, "eval_loss": 1.7737478017807007, "eval_runtime": 1.5139, "eval_samples_per_second": 154.563, "eval_steps_per_second": 5.284, "step": 15 }, { "epoch": 15.12, "eval_loss": 1.6676586866378784, "eval_runtime": 1.5263, "eval_samples_per_second": 153.316, "eval_steps_per_second": 5.242, "step": 16 }, { "epoch": 16.12, "eval_loss": 1.6410826444625854, "eval_runtime": 1.5192, "eval_samples_per_second": 154.025, "eval_steps_per_second": 5.266, "step": 17 }, { "epoch": 17.12, "eval_loss": 1.5739473104476929, "eval_runtime": 1.5309, "eval_samples_per_second": 152.855, "eval_steps_per_second": 5.226, "step": 18 }, { "epoch": 18.12, "eval_loss": 1.5334192514419556, "eval_runtime": 1.5271, "eval_samples_per_second": 153.235, "eval_steps_per_second": 5.239, "step": 19 }, { "epoch": 19.12, "learning_rate": 0.00029815325108927063, "loss": 1.6568, "step": 20 }, { "epoch": 19.12, "eval_loss": 1.47941255569458, "eval_runtime": 1.5335, "eval_samples_per_second": 152.593, "eval_steps_per_second": 5.217, "step": 20 }, { "epoch": 20.12, "eval_loss": 1.4007827043533325, "eval_runtime": 1.5222, "eval_samples_per_second": 153.722, "eval_steps_per_second": 5.255, "step": 21 }, { "epoch": 21.12, "eval_loss": 1.3624812364578247, "eval_runtime": 1.5197, "eval_samples_per_second": 153.982, "eval_steps_per_second": 5.264, "step": 22 }, { "epoch": 22.12, "eval_loss": 1.2963740825653076, "eval_runtime": 1.5258, "eval_samples_per_second": 153.363, "eval_steps_per_second": 5.243, "step": 23 }, { "epoch": 23.12, "eval_loss": 1.2041164636611938, "eval_runtime": 1.5251, "eval_samples_per_second": 153.434, "eval_steps_per_second": 5.246, "step": 24 }, { "epoch": 24.12, "learning_rate": 0.00029711779206048454, "loss": 1.3674, "step": 25 }, { "epoch": 24.12, "eval_loss": 1.1971029043197632, "eval_runtime": 1.535, "eval_samples_per_second": 152.446, "eval_steps_per_second": 5.212, "step": 25 }, { "epoch": 25.12, "eval_loss": 1.1571109294891357, "eval_runtime": 1.5213, "eval_samples_per_second": 153.815, "eval_steps_per_second": 5.259, "step": 26 }, { "epoch": 26.12, "eval_loss": 1.1079976558685303, "eval_runtime": 1.5286, "eval_samples_per_second": 153.079, "eval_steps_per_second": 5.233, "step": 27 }, { "epoch": 27.12, "eval_loss": 1.109868049621582, "eval_runtime": 1.5388, "eval_samples_per_second": 152.068, "eval_steps_per_second": 5.199, "step": 28 }, { "epoch": 28.12, "eval_loss": 1.0929827690124512, "eval_runtime": 1.5243, "eval_samples_per_second": 153.513, "eval_steps_per_second": 5.248, "step": 29 }, { "epoch": 29.12, "learning_rate": 0.0002958554880596515, "loss": 1.145, "step": 30 }, { "epoch": 29.12, "eval_loss": 1.0333445072174072, "eval_runtime": 1.528, "eval_samples_per_second": 153.138, "eval_steps_per_second": 5.235, "step": 30 }, { "epoch": 30.12, "eval_loss": 1.009576678276062, "eval_runtime": 1.5222, "eval_samples_per_second": 153.722, "eval_steps_per_second": 5.255, "step": 31 }, { "epoch": 31.12, "eval_loss": 1.0011868476867676, "eval_runtime": 1.5185, "eval_samples_per_second": 154.104, "eval_steps_per_second": 5.269, "step": 32 }, { "epoch": 32.12, "eval_loss": 0.9265638589859009, "eval_runtime": 1.5235, "eval_samples_per_second": 153.589, "eval_steps_per_second": 5.251, "step": 33 }, { "epoch": 33.12, "eval_loss": 0.962448239326477, "eval_runtime": 1.5219, "eval_samples_per_second": 153.758, "eval_steps_per_second": 5.257, "step": 34 }, { "epoch": 34.12, "learning_rate": 0.000294368285468047, "loss": 0.9987, "step": 35 }, { "epoch": 34.12, "eval_loss": 0.9425073862075806, "eval_runtime": 1.5206, "eval_samples_per_second": 153.885, "eval_steps_per_second": 5.261, "step": 35 }, { "epoch": 35.12, "eval_loss": 0.9353674650192261, "eval_runtime": 1.5211, "eval_samples_per_second": 153.831, "eval_steps_per_second": 5.259, "step": 36 }, { "epoch": 36.12, "eval_loss": 0.9090538024902344, "eval_runtime": 1.5239, "eval_samples_per_second": 153.554, "eval_steps_per_second": 5.25, "step": 37 }, { "epoch": 37.12, "eval_loss": 0.9006912708282471, "eval_runtime": 1.6666, "eval_samples_per_second": 140.404, "eval_steps_per_second": 4.8, "step": 38 }, { "epoch": 38.12, "eval_loss": 0.9648869037628174, "eval_runtime": 1.5236, "eval_samples_per_second": 153.587, "eval_steps_per_second": 5.251, "step": 39 }, { "epoch": 39.12, "learning_rate": 0.00029265847744427303, "loss": 0.9071, "step": 40 }, { "epoch": 39.12, "eval_loss": 0.9199429154396057, "eval_runtime": 1.526, "eval_samples_per_second": 153.343, "eval_steps_per_second": 5.242, "step": 40 }, { "epoch": 40.12, "eval_loss": 0.8650604486465454, "eval_runtime": 1.5281, "eval_samples_per_second": 153.127, "eval_steps_per_second": 5.235, "step": 41 }, { "epoch": 41.12, "eval_loss": 0.8727077841758728, "eval_runtime": 1.5186, "eval_samples_per_second": 154.087, "eval_steps_per_second": 5.268, "step": 42 }, { "epoch": 42.12, "eval_loss": 0.8558970093727112, "eval_runtime": 1.5297, "eval_samples_per_second": 152.968, "eval_steps_per_second": 5.23, "step": 43 }, { "epoch": 43.12, "eval_loss": 0.8499311804771423, "eval_runtime": 1.5225, "eval_samples_per_second": 153.692, "eval_steps_per_second": 5.254, "step": 44 }, { "epoch": 44.12, "learning_rate": 0.0002907287003883726, "loss": 0.8522, "step": 45 }, { "epoch": 44.12, "eval_loss": 0.8547362089157104, "eval_runtime": 1.5331, "eval_samples_per_second": 152.637, "eval_steps_per_second": 5.218, "step": 45 }, { "epoch": 45.12, "eval_loss": 0.8880292177200317, "eval_runtime": 1.5217, "eval_samples_per_second": 153.771, "eval_steps_per_second": 5.257, "step": 46 }, { "epoch": 46.12, "eval_loss": 0.8677502870559692, "eval_runtime": 1.5273, "eval_samples_per_second": 153.214, "eval_steps_per_second": 5.238, "step": 47 }, { "epoch": 47.12, "eval_loss": 0.8565409183502197, "eval_runtime": 1.5222, "eval_samples_per_second": 153.723, "eval_steps_per_second": 5.255, "step": 48 }, { "epoch": 48.12, "eval_loss": 0.8197174072265625, "eval_runtime": 1.5188, "eval_samples_per_second": 154.07, "eval_steps_per_second": 5.267, "step": 49 }, { "epoch": 49.12, "learning_rate": 0.000288581929876693, "loss": 0.8153, "step": 50 }, { "epoch": 49.12, "eval_loss": 0.8439480662345886, "eval_runtime": 1.5245, "eval_samples_per_second": 153.497, "eval_steps_per_second": 5.248, "step": 50 }, { "epoch": 49.12, "step": 50, "total_flos": 4527521789902848.0, "train_loss": 1.4773838996887207, "train_runtime": 2397.7837, "train_samples_per_second": 43.728, "train_steps_per_second": 0.167 } ], "logging_steps": 5, "max_steps": 400, "num_train_epochs": 50, "save_steps": 500, "total_flos": 4527521789902848.0, "trial_name": null, "trial_params": null }