7B_call_sum / trainer_state.json
MaruchanPark
add model
f43ca49
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1580,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03164556962025317,
"grad_norm": 0.0072021484375,
"learning_rate": 0.0002,
"loss": 0.0019,
"step": 10
},
{
"epoch": 0.06329113924050633,
"grad_norm": 0.01226806640625,
"learning_rate": 0.0002,
"loss": 0.0012,
"step": 20
},
{
"epoch": 0.0949367088607595,
"grad_norm": 0.013427734375,
"learning_rate": 0.0002,
"loss": 0.0016,
"step": 30
},
{
"epoch": 0.12658227848101267,
"grad_norm": 0.004791259765625,
"learning_rate": 0.0002,
"loss": 0.0037,
"step": 40
},
{
"epoch": 0.15822784810126583,
"grad_norm": 0.00787353515625,
"learning_rate": 0.0002,
"loss": 0.0015,
"step": 50
},
{
"epoch": 0.189873417721519,
"grad_norm": 0.0164794921875,
"learning_rate": 0.0002,
"loss": 0.0023,
"step": 60
},
{
"epoch": 0.22151898734177214,
"grad_norm": 0.0019989013671875,
"learning_rate": 0.0002,
"loss": 0.0017,
"step": 70
},
{
"epoch": 0.25316455696202533,
"grad_norm": 0.043701171875,
"learning_rate": 0.0002,
"loss": 0.0018,
"step": 80
},
{
"epoch": 0.2848101265822785,
"grad_norm": 0.04150390625,
"learning_rate": 0.0002,
"loss": 0.0012,
"step": 90
},
{
"epoch": 0.31645569620253167,
"grad_norm": 0.0126953125,
"learning_rate": 0.0002,
"loss": 0.0016,
"step": 100
},
{
"epoch": 0.34810126582278483,
"grad_norm": 0.0299072265625,
"learning_rate": 0.0002,
"loss": 0.0017,
"step": 110
},
{
"epoch": 0.379746835443038,
"grad_norm": 0.017333984375,
"learning_rate": 0.0002,
"loss": 0.0021,
"step": 120
},
{
"epoch": 0.41139240506329117,
"grad_norm": 0.0284423828125,
"learning_rate": 0.0002,
"loss": 0.004,
"step": 130
},
{
"epoch": 0.4430379746835443,
"grad_norm": 0.0228271484375,
"learning_rate": 0.0002,
"loss": 0.0015,
"step": 140
},
{
"epoch": 0.47468354430379744,
"grad_norm": 0.0216064453125,
"learning_rate": 0.0002,
"loss": 0.0012,
"step": 150
},
{
"epoch": 0.5063291139240507,
"grad_norm": 0.0079345703125,
"learning_rate": 0.0002,
"loss": 0.0017,
"step": 160
},
{
"epoch": 0.5379746835443038,
"grad_norm": 0.03564453125,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 170
},
{
"epoch": 0.569620253164557,
"grad_norm": 0.0859375,
"learning_rate": 0.0002,
"loss": 0.0016,
"step": 180
},
{
"epoch": 0.6012658227848101,
"grad_norm": 0.0014801025390625,
"learning_rate": 0.0002,
"loss": 0.0013,
"step": 190
},
{
"epoch": 0.6329113924050633,
"grad_norm": 0.02734375,
"learning_rate": 0.0002,
"loss": 0.0015,
"step": 200
},
{
"epoch": 0.6645569620253164,
"grad_norm": 0.00836181640625,
"learning_rate": 0.0002,
"loss": 0.0012,
"step": 210
},
{
"epoch": 0.6962025316455697,
"grad_norm": 0.0218505859375,
"learning_rate": 0.0002,
"loss": 0.0012,
"step": 220
},
{
"epoch": 0.7278481012658228,
"grad_norm": 0.00799560546875,
"learning_rate": 0.0002,
"loss": 0.0013,
"step": 230
},
{
"epoch": 0.759493670886076,
"grad_norm": 0.0478515625,
"learning_rate": 0.0002,
"loss": 0.003,
"step": 240
},
{
"epoch": 0.7911392405063291,
"grad_norm": 0.091796875,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 250
},
{
"epoch": 0.8227848101265823,
"grad_norm": 0.0218505859375,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 260
},
{
"epoch": 0.8544303797468354,
"grad_norm": 0.006317138671875,
"learning_rate": 0.0002,
"loss": 0.0009,
"step": 270
},
{
"epoch": 0.8860759493670886,
"grad_norm": 0.034912109375,
"learning_rate": 0.0002,
"loss": 0.0016,
"step": 280
},
{
"epoch": 0.9177215189873418,
"grad_norm": 0.0115966796875,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 290
},
{
"epoch": 0.9493670886075949,
"grad_norm": 0.04296875,
"learning_rate": 0.0002,
"loss": 0.0027,
"step": 300
},
{
"epoch": 0.9810126582278481,
"grad_norm": 0.0277099609375,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 310
},
{
"epoch": 1.0126582278481013,
"grad_norm": 0.0240478515625,
"learning_rate": 0.0002,
"loss": 0.001,
"step": 320
},
{
"epoch": 1.0443037974683544,
"grad_norm": 0.0078125,
"learning_rate": 0.0002,
"loss": 0.0042,
"step": 330
},
{
"epoch": 1.0759493670886076,
"grad_norm": 0.0047607421875,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 340
},
{
"epoch": 1.1075949367088607,
"grad_norm": 0.007598876953125,
"learning_rate": 0.0002,
"loss": 0.0011,
"step": 350
},
{
"epoch": 1.139240506329114,
"grad_norm": 0.00665283203125,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 360
},
{
"epoch": 1.1708860759493671,
"grad_norm": 0.00445556640625,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 370
},
{
"epoch": 1.2025316455696202,
"grad_norm": 0.004364013671875,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 380
},
{
"epoch": 1.2341772151898733,
"grad_norm": 0.003662109375,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 390
},
{
"epoch": 1.2658227848101267,
"grad_norm": 0.020263671875,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 400
},
{
"epoch": 1.2974683544303798,
"grad_norm": 0.008544921875,
"learning_rate": 0.0002,
"loss": 0.0009,
"step": 410
},
{
"epoch": 1.3291139240506329,
"grad_norm": 0.0140380859375,
"learning_rate": 0.0002,
"loss": 0.0015,
"step": 420
},
{
"epoch": 1.360759493670886,
"grad_norm": 0.00616455078125,
"learning_rate": 0.0002,
"loss": 0.001,
"step": 430
},
{
"epoch": 1.3924050632911391,
"grad_norm": 0.00506591796875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 440
},
{
"epoch": 1.4240506329113924,
"grad_norm": 0.041748046875,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 450
},
{
"epoch": 1.4556962025316456,
"grad_norm": 0.0849609375,
"learning_rate": 0.0002,
"loss": 0.0011,
"step": 460
},
{
"epoch": 1.4873417721518987,
"grad_norm": 0.01495361328125,
"learning_rate": 0.0002,
"loss": 0.001,
"step": 470
},
{
"epoch": 1.518987341772152,
"grad_norm": 0.007110595703125,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 480
},
{
"epoch": 1.5506329113924051,
"grad_norm": 0.005401611328125,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 490
},
{
"epoch": 1.5822784810126582,
"grad_norm": 0.0084228515625,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 500
},
{
"epoch": 1.6139240506329116,
"grad_norm": 0.01123046875,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 510
},
{
"epoch": 1.6455696202531644,
"grad_norm": 0.00970458984375,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 520
},
{
"epoch": 1.6772151898734178,
"grad_norm": 0.00665283203125,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 530
},
{
"epoch": 1.7088607594936709,
"grad_norm": 0.00506591796875,
"learning_rate": 0.0002,
"loss": 0.001,
"step": 540
},
{
"epoch": 1.740506329113924,
"grad_norm": 0.0205078125,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 550
},
{
"epoch": 1.7721518987341773,
"grad_norm": 0.04052734375,
"learning_rate": 0.0002,
"loss": 0.0011,
"step": 560
},
{
"epoch": 1.8037974683544302,
"grad_norm": 0.0179443359375,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 570
},
{
"epoch": 1.8354430379746836,
"grad_norm": 0.0145263671875,
"learning_rate": 0.0002,
"loss": 0.0009,
"step": 580
},
{
"epoch": 1.8670886075949367,
"grad_norm": 0.004913330078125,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 590
},
{
"epoch": 1.8987341772151898,
"grad_norm": 0.014404296875,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 600
},
{
"epoch": 1.9303797468354431,
"grad_norm": 0.005126953125,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 610
},
{
"epoch": 1.9620253164556962,
"grad_norm": 0.00390625,
"learning_rate": 0.0002,
"loss": 0.001,
"step": 620
},
{
"epoch": 1.9936708860759493,
"grad_norm": 0.0020904541015625,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 630
},
{
"epoch": 2.0253164556962027,
"grad_norm": 0.00102996826171875,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 640
},
{
"epoch": 2.0569620253164556,
"grad_norm": 0.0079345703125,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 650
},
{
"epoch": 2.088607594936709,
"grad_norm": 0.004058837890625,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 660
},
{
"epoch": 2.1202531645569622,
"grad_norm": 0.006683349609375,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 670
},
{
"epoch": 2.151898734177215,
"grad_norm": 0.00799560546875,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 680
},
{
"epoch": 2.1835443037974684,
"grad_norm": 0.01416015625,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 690
},
{
"epoch": 2.2151898734177213,
"grad_norm": 0.005523681640625,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 700
},
{
"epoch": 2.2468354430379747,
"grad_norm": 0.003692626953125,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 710
},
{
"epoch": 2.278481012658228,
"grad_norm": 0.01287841796875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 720
},
{
"epoch": 2.310126582278481,
"grad_norm": 0.0101318359375,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 730
},
{
"epoch": 2.3417721518987342,
"grad_norm": 0.006866455078125,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 740
},
{
"epoch": 2.3734177215189876,
"grad_norm": 0.00921630859375,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 750
},
{
"epoch": 2.4050632911392404,
"grad_norm": 0.0072021484375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 760
},
{
"epoch": 2.4367088607594938,
"grad_norm": 0.00921630859375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 770
},
{
"epoch": 2.4683544303797467,
"grad_norm": 0.01239013671875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 780
},
{
"epoch": 2.5,
"grad_norm": 0.01226806640625,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 790
},
{
"epoch": 2.5316455696202533,
"grad_norm": 0.00872802734375,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 800
},
{
"epoch": 2.5632911392405062,
"grad_norm": 0.007781982421875,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 810
},
{
"epoch": 2.5949367088607596,
"grad_norm": 0.005767822265625,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 820
},
{
"epoch": 2.6265822784810124,
"grad_norm": 0.0111083984375,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 830
},
{
"epoch": 2.6582278481012658,
"grad_norm": 0.0157470703125,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 840
},
{
"epoch": 2.689873417721519,
"grad_norm": 0.0111083984375,
"learning_rate": 0.0002,
"loss": 0.0008,
"step": 850
},
{
"epoch": 2.721518987341772,
"grad_norm": 0.009521484375,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 860
},
{
"epoch": 2.7531645569620253,
"grad_norm": 0.0018463134765625,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 870
},
{
"epoch": 2.7848101265822782,
"grad_norm": 0.00168609619140625,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 880
},
{
"epoch": 2.8164556962025316,
"grad_norm": 0.01470947265625,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 890
},
{
"epoch": 2.848101265822785,
"grad_norm": 0.0211181640625,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 900
},
{
"epoch": 2.879746835443038,
"grad_norm": 0.0057373046875,
"learning_rate": 0.0002,
"loss": 0.0017,
"step": 910
},
{
"epoch": 2.911392405063291,
"grad_norm": 0.00469970703125,
"learning_rate": 0.0002,
"loss": 0.0022,
"step": 920
},
{
"epoch": 2.9430379746835444,
"grad_norm": 0.00982666015625,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 930
},
{
"epoch": 2.9746835443037973,
"grad_norm": 0.0087890625,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 940
},
{
"epoch": 3.0063291139240507,
"grad_norm": 0.0098876953125,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 950
},
{
"epoch": 3.037974683544304,
"grad_norm": 0.00360107421875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 960
},
{
"epoch": 3.069620253164557,
"grad_norm": 0.0150146484375,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 970
},
{
"epoch": 3.1012658227848102,
"grad_norm": 0.005828857421875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 980
},
{
"epoch": 3.132911392405063,
"grad_norm": 0.00665283203125,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 990
},
{
"epoch": 3.1645569620253164,
"grad_norm": 0.005401611328125,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1000
},
{
"epoch": 3.1962025316455698,
"grad_norm": 0.0216064453125,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 1010
},
{
"epoch": 3.2278481012658227,
"grad_norm": 0.0152587890625,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 1020
},
{
"epoch": 3.259493670886076,
"grad_norm": 0.0096435546875,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1030
},
{
"epoch": 3.291139240506329,
"grad_norm": 0.007232666015625,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1040
},
{
"epoch": 3.3227848101265822,
"grad_norm": 0.006439208984375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1050
},
{
"epoch": 3.3544303797468356,
"grad_norm": 0.003692626953125,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 1060
},
{
"epoch": 3.3860759493670884,
"grad_norm": 0.0028839111328125,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1070
},
{
"epoch": 3.4177215189873418,
"grad_norm": 0.005859375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1080
},
{
"epoch": 3.449367088607595,
"grad_norm": 0.0067138671875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1090
},
{
"epoch": 3.481012658227848,
"grad_norm": 0.007171630859375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1100
},
{
"epoch": 3.5126582278481013,
"grad_norm": 0.00537109375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1110
},
{
"epoch": 3.5443037974683547,
"grad_norm": 0.00775146484375,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1120
},
{
"epoch": 3.5759493670886076,
"grad_norm": 0.0030517578125,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1130
},
{
"epoch": 3.607594936708861,
"grad_norm": 0.00823974609375,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1140
},
{
"epoch": 3.6392405063291138,
"grad_norm": 0.005157470703125,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1150
},
{
"epoch": 3.670886075949367,
"grad_norm": 0.01202392578125,
"learning_rate": 0.0002,
"loss": 0.0009,
"step": 1160
},
{
"epoch": 3.7025316455696204,
"grad_norm": 0.0233154296875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1170
},
{
"epoch": 3.7341772151898733,
"grad_norm": 0.005218505859375,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1180
},
{
"epoch": 3.7658227848101267,
"grad_norm": 0.0108642578125,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1190
},
{
"epoch": 3.7974683544303796,
"grad_norm": 0.0086669921875,
"learning_rate": 0.0002,
"loss": 0.0006,
"step": 1200
},
{
"epoch": 3.829113924050633,
"grad_norm": 0.002685546875,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1210
},
{
"epoch": 3.8607594936708862,
"grad_norm": 0.0019683837890625,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1220
},
{
"epoch": 3.892405063291139,
"grad_norm": 0.0020294189453125,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 1230
},
{
"epoch": 3.9240506329113924,
"grad_norm": 0.003509521484375,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1240
},
{
"epoch": 3.9556962025316453,
"grad_norm": 0.004486083984375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1250
},
{
"epoch": 3.9873417721518987,
"grad_norm": 0.0034637451171875,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1260
},
{
"epoch": 4.018987341772152,
"grad_norm": 0.0031890869140625,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1270
},
{
"epoch": 4.050632911392405,
"grad_norm": 0.0036163330078125,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1280
},
{
"epoch": 4.082278481012658,
"grad_norm": 0.01055908203125,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1290
},
{
"epoch": 4.113924050632911,
"grad_norm": 0.00188446044921875,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1300
},
{
"epoch": 4.1455696202531644,
"grad_norm": 0.0029296875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1310
},
{
"epoch": 4.177215189873418,
"grad_norm": 0.00167083740234375,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1320
},
{
"epoch": 4.208860759493671,
"grad_norm": 0.005584716796875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1330
},
{
"epoch": 4.2405063291139244,
"grad_norm": 0.007171630859375,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1340
},
{
"epoch": 4.272151898734177,
"grad_norm": 0.004119873046875,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1350
},
{
"epoch": 4.30379746835443,
"grad_norm": 0.00543212890625,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1360
},
{
"epoch": 4.3354430379746836,
"grad_norm": 0.00860595703125,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1370
},
{
"epoch": 4.367088607594937,
"grad_norm": 0.0130615234375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1380
},
{
"epoch": 4.39873417721519,
"grad_norm": 0.0074462890625,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1390
},
{
"epoch": 4.430379746835443,
"grad_norm": 0.00384521484375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1400
},
{
"epoch": 4.462025316455696,
"grad_norm": 0.0106201171875,
"learning_rate": 0.0002,
"loss": 0.0004,
"step": 1410
},
{
"epoch": 4.493670886075949,
"grad_norm": 0.0027618408203125,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1420
},
{
"epoch": 4.525316455696203,
"grad_norm": 0.00555419921875,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1430
},
{
"epoch": 4.556962025316456,
"grad_norm": 0.006011962890625,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1440
},
{
"epoch": 4.588607594936709,
"grad_norm": 0.006256103515625,
"learning_rate": 0.0002,
"loss": 0.0002,
"step": 1450
},
{
"epoch": 4.620253164556962,
"grad_norm": 0.003997802734375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1460
},
{
"epoch": 4.651898734177215,
"grad_norm": 0.006744384765625,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1470
},
{
"epoch": 4.6835443037974684,
"grad_norm": 0.00836181640625,
"learning_rate": 0.0002,
"loss": 0.0005,
"step": 1480
},
{
"epoch": 4.715189873417722,
"grad_norm": 0.0076904296875,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1490
},
{
"epoch": 4.746835443037975,
"grad_norm": 0.01239013671875,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1500
},
{
"epoch": 4.7784810126582276,
"grad_norm": 0.00146484375,
"learning_rate": 0.0002,
"loss": 0.0007,
"step": 1510
},
{
"epoch": 4.810126582278481,
"grad_norm": 0.00909423828125,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1520
},
{
"epoch": 4.841772151898734,
"grad_norm": 0.0133056640625,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1530
},
{
"epoch": 4.8734177215189876,
"grad_norm": 0.00360107421875,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1540
},
{
"epoch": 4.905063291139241,
"grad_norm": 0.00799560546875,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1550
},
{
"epoch": 4.936708860759493,
"grad_norm": 0.0047607421875,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1560
},
{
"epoch": 4.968354430379747,
"grad_norm": 0.00157928466796875,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1570
},
{
"epoch": 5.0,
"grad_norm": 0.0101318359375,
"learning_rate": 0.0002,
"loss": 0.0003,
"step": 1580
}
],
"logging_steps": 10,
"max_steps": 1580,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.151495561120973e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}