llama3-8b-classification-gpt4o-100k2 / trainer_state.json

Model save

f74a994 verified 4 months ago

9.18 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9977827050997783,
	"eval_steps": 500,
	"global_step": 225,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.004434589800443459,
	"grad_norm": 1.91265869140625,
	"learning_rate": 4.347826086956522e-05,
	"loss": 2.8127,
	"step": 1
	},
	{
	"epoch": 0.022172949002217297,
	"grad_norm": 1.5314122438430786,
	"learning_rate": 0.0002173913043478261,
	"loss": 2.7241,
	"step": 5
	},
	{
	"epoch": 0.04434589800443459,
	"grad_norm": 0.6431057453155518,
	"learning_rate": 0.0004347826086956522,
	"loss": 2.2423,
	"step": 10
	},
	{
	"epoch": 0.06651884700665188,
	"grad_norm": 0.5257381200790405,
	"learning_rate": 0.0006521739130434783,
	"loss": 1.9505,
	"step": 15
	},
	{
	"epoch": 0.08869179600886919,
	"grad_norm": 0.37703999876976013,
	"learning_rate": 0.0008695652173913044,
	"loss": 1.7881,
	"step": 20
	},
	{
	"epoch": 0.11086474501108648,
	"grad_norm": 0.30256885290145874,
	"learning_rate": 0.0009900990099009901,
	"loss": 1.7031,
	"step": 25
	},
	{
	"epoch": 0.13303769401330376,
	"grad_norm": 0.3443244993686676,
	"learning_rate": 0.0009653465346534653,
	"loss": 1.6352,
	"step": 30
	},
	{
	"epoch": 0.15521064301552107,
	"grad_norm": 0.369827538728714,
	"learning_rate": 0.0009405940594059406,
	"loss": 1.5746,
	"step": 35
	},
	{
	"epoch": 0.17738359201773837,
	"grad_norm": 0.231527641415596,
	"learning_rate": 0.0009158415841584159,
	"loss": 1.5409,
	"step": 40
	},
	{
	"epoch": 0.19955654101995565,
	"grad_norm": 0.22827404737472534,
	"learning_rate": 0.0008910891089108911,
	"loss": 1.5187,
	"step": 45
	},
	{
	"epoch": 0.22172949002217296,
	"grad_norm": 0.2396710067987442,
	"learning_rate": 0.0008663366336633663,
	"loss": 1.5128,
	"step": 50
	},
	{
	"epoch": 0.24390243902439024,
	"grad_norm": 0.20095600187778473,
	"learning_rate": 0.0008415841584158416,
	"loss": 1.4848,
	"step": 55
	},
	{
	"epoch": 0.2660753880266075,
	"grad_norm": 0.28900983929634094,
	"learning_rate": 0.0008168316831683168,
	"loss": 1.4962,
	"step": 60
	},
	{
	"epoch": 0.28824833702882485,
	"grad_norm": 0.25716254115104675,
	"learning_rate": 0.0007920792079207921,
	"loss": 1.4789,
	"step": 65
	},
	{
	"epoch": 0.31042128603104213,
	"grad_norm": 0.252340167760849,
	"learning_rate": 0.0007673267326732674,
	"loss": 1.458,
	"step": 70
	},
	{
	"epoch": 0.3325942350332594,
	"grad_norm": 0.20464155077934265,
	"learning_rate": 0.0007425742574257426,
	"loss": 1.4558,
	"step": 75
	},
	{
	"epoch": 0.35476718403547675,
	"grad_norm": 0.23394732177257538,
	"learning_rate": 0.0007178217821782178,
	"loss": 1.4562,
	"step": 80
	},
	{
	"epoch": 0.376940133037694,
	"grad_norm": 0.2164139449596405,
	"learning_rate": 0.000693069306930693,
	"loss": 1.4338,
	"step": 85
	},
	{
	"epoch": 0.3991130820399113,
	"grad_norm": 0.215862438082695,
	"learning_rate": 0.0006683168316831684,
	"loss": 1.4287,
	"step": 90
	},
	{
	"epoch": 0.4212860310421286,
	"grad_norm": 0.20270515978336334,
	"learning_rate": 0.0006435643564356436,
	"loss": 1.4226,
	"step": 95
	},
	{
	"epoch": 0.4434589800443459,
	"grad_norm": 0.20255711674690247,
	"learning_rate": 0.0006188118811881188,
	"loss": 1.4314,
	"step": 100
	},
	{
	"epoch": 0.4656319290465632,
	"grad_norm": 0.20747065544128418,
	"learning_rate": 0.000594059405940594,
	"loss": 1.4194,
	"step": 105
	},
	{
	"epoch": 0.4878048780487805,
	"grad_norm": 0.2104884535074234,
	"learning_rate": 0.0005693069306930693,
	"loss": 1.4106,
	"step": 110
	},
	{
	"epoch": 0.5099778270509978,
	"grad_norm": 0.21514882147312164,
	"learning_rate": 0.0005445544554455446,
	"loss": 1.42,
	"step": 115
	},
	{
	"epoch": 0.532150776053215,
	"grad_norm": 0.20466424524784088,
	"learning_rate": 0.0005198019801980198,
	"loss": 1.3937,
	"step": 120
	},
	{
	"epoch": 0.5543237250554324,
	"grad_norm": 0.2181282341480255,
	"learning_rate": 0.0004950495049504951,
	"loss": 1.3972,
	"step": 125
	},
	{
	"epoch": 0.5764966740576497,
	"grad_norm": 0.22615699470043182,
	"learning_rate": 0.0004702970297029703,
	"loss": 1.3882,
	"step": 130
	},
	{
	"epoch": 0.5986696230598669,
	"grad_norm": 0.1967965066432953,
	"learning_rate": 0.00044554455445544556,
	"loss": 1.388,
	"step": 135
	},
	{
	"epoch": 0.6208425720620843,
	"grad_norm": 0.2030034065246582,
	"learning_rate": 0.0004207920792079208,
	"loss": 1.4048,
	"step": 140
	},
	{
	"epoch": 0.6430155210643016,
	"grad_norm": 0.2136310189962387,
	"learning_rate": 0.00039603960396039607,
	"loss": 1.3918,
	"step": 145
	},
	{
	"epoch": 0.6651884700665188,
	"grad_norm": 0.22149060666561127,
	"learning_rate": 0.0003712871287128713,
	"loss": 1.4023,
	"step": 150
	},
	{
	"epoch": 0.6873614190687362,
	"grad_norm": 0.2130667269229889,
	"learning_rate": 0.0003465346534653465,
	"loss": 1.3933,
	"step": 155
	},
	{
	"epoch": 0.7095343680709535,
	"grad_norm": 0.19920696318149567,
	"learning_rate": 0.0003217821782178218,
	"loss": 1.3815,
	"step": 160
	},
	{
	"epoch": 0.7317073170731707,
	"grad_norm": 0.20453611016273499,
	"learning_rate": 0.000297029702970297,
	"loss": 1.3648,
	"step": 165
	},
	{
	"epoch": 0.753880266075388,
	"grad_norm": 0.21325863897800446,
	"learning_rate": 0.0002722772277227723,
	"loss": 1.3773,
	"step": 170
	},
	{
	"epoch": 0.7760532150776053,
	"grad_norm": 0.2014823704957962,
	"learning_rate": 0.00024752475247524753,
	"loss": 1.3881,
	"step": 175
	},
	{
	"epoch": 0.7982261640798226,
	"grad_norm": 0.20359407365322113,
	"learning_rate": 0.00022277227722772278,
	"loss": 1.3826,
	"step": 180
	},
	{
	"epoch": 0.8203991130820399,
	"grad_norm": 0.21738748252391815,
	"learning_rate": 0.00019801980198019803,
	"loss": 1.3705,
	"step": 185
	},
	{
	"epoch": 0.8425720620842572,
	"grad_norm": 0.1990172564983368,
	"learning_rate": 0.00017326732673267326,
	"loss": 1.3693,
	"step": 190
	},
	{
	"epoch": 0.8647450110864745,
	"grad_norm": 0.2007543295621872,
	"learning_rate": 0.0001485148514851485,
	"loss": 1.3575,
	"step": 195
	},
	{
	"epoch": 0.8869179600886918,
	"grad_norm": 0.5149243474006653,
	"learning_rate": 0.00012376237623762376,
	"loss": 1.374,
	"step": 200
	},
	{
	"epoch": 0.9090909090909091,
	"grad_norm": 0.2131042778491974,
	"learning_rate": 9.900990099009902e-05,
	"loss": 1.3636,
	"step": 205
	},
	{
	"epoch": 0.9312638580931264,
	"grad_norm": 0.19097404181957245,
	"learning_rate": 7.425742574257426e-05,
	"loss": 1.3489,
	"step": 210
	},
	{
	"epoch": 0.9534368070953437,
	"grad_norm": 0.19905418157577515,
	"learning_rate": 4.950495049504951e-05,
	"loss": 1.3442,
	"step": 215
	},
	{
	"epoch": 0.975609756097561,
	"grad_norm": 0.19617854058742523,
	"learning_rate": 2.4752475247524754e-05,
	"loss": 1.3721,
	"step": 220
	},
	{
	"epoch": 0.9977827050997783,
	"grad_norm": 0.20064575970172882,
	"learning_rate": 0.0,
	"loss": 1.3767,
	"step": 225
	},
	{
	"epoch": 0.9977827050997783,
	"eval_loss": 1.7732421159744263,
	"eval_runtime": 0.5415,
	"eval_samples_per_second": 1.847,
	"eval_steps_per_second": 1.847,
	"step": 225
	},
	{
	"epoch": 0.9977827050997783,
	"step": 225,
	"total_flos": 3.3259687694984806e+17,
	"train_loss": 1.4963340536753336,
	"train_runtime": 725.2803,
	"train_samples_per_second": 9.934,
	"train_steps_per_second": 0.31
	}
	],
	"logging_steps": 5,
	"max_steps": 225,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 100,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3.3259687694984806e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}