add model

Browse files

Files changed (10) hide show

adapter_config.json +29 -0
adapter_model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer_config.json +97 -0
trainer_state.json +1139 -0
training_args.bin +3 -0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "/KonanLLM-peft-train-serving/model/kylin-7b-chat-2406-v2",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7c8c5689f6d0c54af75f69b7c7d4e70a1df14cf7d27b39fe30ce635c2a21a4d
+size 6832728

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86052738ddbd1f67a03f20ed189ddb89f2b7ea19879f9fb7f429e298f92fbfeb
+size 13738693

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ae2bc7c0ba6650d2e269643a01954663ce1d8e27edf4c2de2269f488c94882e
+size 14575

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e49e6c54a6616f96e8fa4a7eaf8c40cf787e5446bb7075cfc92fd89425df17b9
+size 627

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": "<unk>"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,97 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<|tel|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32001": {
+      "content": "<|rnn|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32002": {
+      "content": "<|email|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32003": {
+      "content": "<|crd|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "32004": {
+      "content": "<|acc|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "51510": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "51511": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "chat_template": "{{('<|im_start|>assistant\n' + messages[0]['content'].strip() + '<|im_end|>' + '\n' if messages[0]['role'] == 'system' else '')}}{% for message in (messages[1:] if messages[0]['role'] == 'system' else messages) %}{% if message['role'] == 'user' %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>' + '\n' + '<|im_start|>assistant\n'}}{% elif message['role'] == 'assistant' %}{{message['content']}}{% if loop.last %}{% if add_generation_prompt %}{{'<|im_end|>'}}{% endif %}{% else %}{{'<|im_end|>'}}{% endif %}{% endif %}{% endfor %}",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "model_name": "kylin-7b",
+  "pad_token": "</s>",
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1139 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1580,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03164556962025317,
+      "grad_norm": 0.0072021484375,
+      "learning_rate": 0.0002,
+      "loss": 0.0019,
+      "step": 10
+    },
+    {
+      "epoch": 0.06329113924050633,
+      "grad_norm": 0.01226806640625,
+      "learning_rate": 0.0002,
+      "loss": 0.0012,
+      "step": 20
+    },
+    {
+      "epoch": 0.0949367088607595,
+      "grad_norm": 0.013427734375,
+      "learning_rate": 0.0002,
+      "loss": 0.0016,
+      "step": 30
+    },
+    {
+      "epoch": 0.12658227848101267,
+      "grad_norm": 0.004791259765625,
+      "learning_rate": 0.0002,
+      "loss": 0.0037,
+      "step": 40
+    },
+    {
+      "epoch": 0.15822784810126583,
+      "grad_norm": 0.00787353515625,
+      "learning_rate": 0.0002,
+      "loss": 0.0015,
+      "step": 50
+    },
+    {
+      "epoch": 0.189873417721519,
+      "grad_norm": 0.0164794921875,
+      "learning_rate": 0.0002,
+      "loss": 0.0023,
+      "step": 60
+    },
+    {
+      "epoch": 0.22151898734177214,
+      "grad_norm": 0.0019989013671875,
+      "learning_rate": 0.0002,
+      "loss": 0.0017,
+      "step": 70
+    },
+    {
+      "epoch": 0.25316455696202533,
+      "grad_norm": 0.043701171875,
+      "learning_rate": 0.0002,
+      "loss": 0.0018,
+      "step": 80
+    },
+    {
+      "epoch": 0.2848101265822785,
+      "grad_norm": 0.04150390625,
+      "learning_rate": 0.0002,
+      "loss": 0.0012,
+      "step": 90
+    },
+    {
+      "epoch": 0.31645569620253167,
+      "grad_norm": 0.0126953125,
+      "learning_rate": 0.0002,
+      "loss": 0.0016,
+      "step": 100
+    },
+    {
+      "epoch": 0.34810126582278483,
+      "grad_norm": 0.0299072265625,
+      "learning_rate": 0.0002,
+      "loss": 0.0017,
+      "step": 110
+    },
+    {
+      "epoch": 0.379746835443038,
+      "grad_norm": 0.017333984375,
+      "learning_rate": 0.0002,
+      "loss": 0.0021,
+      "step": 120
+    },
+    {
+      "epoch": 0.41139240506329117,
+      "grad_norm": 0.0284423828125,
+      "learning_rate": 0.0002,
+      "loss": 0.004,
+      "step": 130
+    },
+    {
+      "epoch": 0.4430379746835443,
+      "grad_norm": 0.0228271484375,
+      "learning_rate": 0.0002,
+      "loss": 0.0015,
+      "step": 140
+    },
+    {
+      "epoch": 0.47468354430379744,
+      "grad_norm": 0.0216064453125,
+      "learning_rate": 0.0002,
+      "loss": 0.0012,
+      "step": 150
+    },
+    {
+      "epoch": 0.5063291139240507,
+      "grad_norm": 0.0079345703125,
+      "learning_rate": 0.0002,
+      "loss": 0.0017,
+      "step": 160
+    },
+    {
+      "epoch": 0.5379746835443038,
+      "grad_norm": 0.03564453125,
+      "learning_rate": 0.0002,
+      "loss": 0.0007,
+      "step": 170
+    },
+    {
+      "epoch": 0.569620253164557,
+      "grad_norm": 0.0859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0016,
+      "step": 180
+    },
+    {
+      "epoch": 0.6012658227848101,
+      "grad_norm": 0.0014801025390625,
+      "learning_rate": 0.0002,
+      "loss": 0.0013,
+      "step": 190
+    },
+    {
+      "epoch": 0.6329113924050633,
+      "grad_norm": 0.02734375,
+      "learning_rate": 0.0002,
+      "loss": 0.0015,
+      "step": 200
+    },
+    {
+      "epoch": 0.6645569620253164,
+      "grad_norm": 0.00836181640625,
+      "learning_rate": 0.0002,
+      "loss": 0.0012,
+      "step": 210
+    },
+    {
+      "epoch": 0.6962025316455697,
+      "grad_norm": 0.0218505859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0012,
+      "step": 220
+    },
+    {
+      "epoch": 0.7278481012658228,
+      "grad_norm": 0.00799560546875,
+      "learning_rate": 0.0002,
+      "loss": 0.0013,
+      "step": 230
+    },
+    {
+      "epoch": 0.759493670886076,
+      "grad_norm": 0.0478515625,
+      "learning_rate": 0.0002,
+      "loss": 0.003,
+      "step": 240
+    },
+    {
+      "epoch": 0.7911392405063291,
+      "grad_norm": 0.091796875,
+      "learning_rate": 0.0002,
+      "loss": 0.0008,
+      "step": 250
+    },
+    {
+      "epoch": 0.8227848101265823,
+      "grad_norm": 0.0218505859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0008,
+      "step": 260
+    },
+    {
+      "epoch": 0.8544303797468354,
+      "grad_norm": 0.006317138671875,
+      "learning_rate": 0.0002,
+      "loss": 0.0009,
+      "step": 270
+    },
+    {
+      "epoch": 0.8860759493670886,
+      "grad_norm": 0.034912109375,
+      "learning_rate": 0.0002,
+      "loss": 0.0016,
+      "step": 280
+    },
+    {
+      "epoch": 0.9177215189873418,
+      "grad_norm": 0.0115966796875,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 290
+    },
+    {
+      "epoch": 0.9493670886075949,
+      "grad_norm": 0.04296875,
+      "learning_rate": 0.0002,
+      "loss": 0.0027,
+      "step": 300
+    },
+    {
+      "epoch": 0.9810126582278481,
+      "grad_norm": 0.0277099609375,
+      "learning_rate": 0.0002,
+      "loss": 0.0008,
+      "step": 310
+    },
+    {
+      "epoch": 1.0126582278481013,
+      "grad_norm": 0.0240478515625,
+      "learning_rate": 0.0002,
+      "loss": 0.001,
+      "step": 320
+    },
+    {
+      "epoch": 1.0443037974683544,
+      "grad_norm": 0.0078125,
+      "learning_rate": 0.0002,
+      "loss": 0.0042,
+      "step": 330
+    },
+    {
+      "epoch": 1.0759493670886076,
+      "grad_norm": 0.0047607421875,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 340
+    },
+    {
+      "epoch": 1.1075949367088607,
+      "grad_norm": 0.007598876953125,
+      "learning_rate": 0.0002,
+      "loss": 0.0011,
+      "step": 350
+    },
+    {
+      "epoch": 1.139240506329114,
+      "grad_norm": 0.00665283203125,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 360
+    },
+    {
+      "epoch": 1.1708860759493671,
+      "grad_norm": 0.00445556640625,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 370
+    },
+    {
+      "epoch": 1.2025316455696202,
+      "grad_norm": 0.004364013671875,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 380
+    },
+    {
+      "epoch": 1.2341772151898733,
+      "grad_norm": 0.003662109375,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 390
+    },
+    {
+      "epoch": 1.2658227848101267,
+      "grad_norm": 0.020263671875,
+      "learning_rate": 0.0002,
+      "loss": 0.0008,
+      "step": 400
+    },
+    {
+      "epoch": 1.2974683544303798,
+      "grad_norm": 0.008544921875,
+      "learning_rate": 0.0002,
+      "loss": 0.0009,
+      "step": 410
+    },
+    {
+      "epoch": 1.3291139240506329,
+      "grad_norm": 0.0140380859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0015,
+      "step": 420
+    },
+    {
+      "epoch": 1.360759493670886,
+      "grad_norm": 0.00616455078125,
+      "learning_rate": 0.0002,
+      "loss": 0.001,
+      "step": 430
+    },
+    {
+      "epoch": 1.3924050632911391,
+      "grad_norm": 0.00506591796875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 440
+    },
+    {
+      "epoch": 1.4240506329113924,
+      "grad_norm": 0.041748046875,
+      "learning_rate": 0.0002,
+      "loss": 0.0008,
+      "step": 450
+    },
+    {
+      "epoch": 1.4556962025316456,
+      "grad_norm": 0.0849609375,
+      "learning_rate": 0.0002,
+      "loss": 0.0011,
+      "step": 460
+    },
+    {
+      "epoch": 1.4873417721518987,
+      "grad_norm": 0.01495361328125,
+      "learning_rate": 0.0002,
+      "loss": 0.001,
+      "step": 470
+    },
+    {
+      "epoch": 1.518987341772152,
+      "grad_norm": 0.007110595703125,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 480
+    },
+    {
+      "epoch": 1.5506329113924051,
+      "grad_norm": 0.005401611328125,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 490
+    },
+    {
+      "epoch": 1.5822784810126582,
+      "grad_norm": 0.0084228515625,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 500
+    },
+    {
+      "epoch": 1.6139240506329116,
+      "grad_norm": 0.01123046875,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 510
+    },
+    {
+      "epoch": 1.6455696202531644,
+      "grad_norm": 0.00970458984375,
+      "learning_rate": 0.0002,
+      "loss": 0.0008,
+      "step": 520
+    },
+    {
+      "epoch": 1.6772151898734178,
+      "grad_norm": 0.00665283203125,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 530
+    },
+    {
+      "epoch": 1.7088607594936709,
+      "grad_norm": 0.00506591796875,
+      "learning_rate": 0.0002,
+      "loss": 0.001,
+      "step": 540
+    },
+    {
+      "epoch": 1.740506329113924,
+      "grad_norm": 0.0205078125,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 550
+    },
+    {
+      "epoch": 1.7721518987341773,
+      "grad_norm": 0.04052734375,
+      "learning_rate": 0.0002,
+      "loss": 0.0011,
+      "step": 560
+    },
+    {
+      "epoch": 1.8037974683544302,
+      "grad_norm": 0.0179443359375,
+      "learning_rate": 0.0002,
+      "loss": 0.0007,
+      "step": 570
+    },
+    {
+      "epoch": 1.8354430379746836,
+      "grad_norm": 0.0145263671875,
+      "learning_rate": 0.0002,
+      "loss": 0.0009,
+      "step": 580
+    },
+    {
+      "epoch": 1.8670886075949367,
+      "grad_norm": 0.004913330078125,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 590
+    },
+    {
+      "epoch": 1.8987341772151898,
+      "grad_norm": 0.014404296875,
+      "learning_rate": 0.0002,
+      "loss": 0.0007,
+      "step": 600
+    },
+    {
+      "epoch": 1.9303797468354431,
+      "grad_norm": 0.005126953125,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 610
+    },
+    {
+      "epoch": 1.9620253164556962,
+      "grad_norm": 0.00390625,
+      "learning_rate": 0.0002,
+      "loss": 0.001,
+      "step": 620
+    },
+    {
+      "epoch": 1.9936708860759493,
+      "grad_norm": 0.0020904541015625,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 630
+    },
+    {
+      "epoch": 2.0253164556962027,
+      "grad_norm": 0.00102996826171875,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 640
+    },
+    {
+      "epoch": 2.0569620253164556,
+      "grad_norm": 0.0079345703125,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 650
+    },
+    {
+      "epoch": 2.088607594936709,
+      "grad_norm": 0.004058837890625,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 660
+    },
+    {
+      "epoch": 2.1202531645569622,
+      "grad_norm": 0.006683349609375,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 670
+    },
+    {
+      "epoch": 2.151898734177215,
+      "grad_norm": 0.00799560546875,
+      "learning_rate": 0.0002,
+      "loss": 0.0007,
+      "step": 680
+    },
+    {
+      "epoch": 2.1835443037974684,
+      "grad_norm": 0.01416015625,
+      "learning_rate": 0.0002,
+      "loss": 0.0008,
+      "step": 690
+    },
+    {
+      "epoch": 2.2151898734177213,
+      "grad_norm": 0.005523681640625,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 700
+    },
+    {
+      "epoch": 2.2468354430379747,
+      "grad_norm": 0.003692626953125,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 710
+    },
+    {
+      "epoch": 2.278481012658228,
+      "grad_norm": 0.01287841796875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 720
+    },
+    {
+      "epoch": 2.310126582278481,
+      "grad_norm": 0.0101318359375,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 730
+    },
+    {
+      "epoch": 2.3417721518987342,
+      "grad_norm": 0.006866455078125,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 740
+    },
+    {
+      "epoch": 2.3734177215189876,
+      "grad_norm": 0.00921630859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 750
+    },
+    {
+      "epoch": 2.4050632911392404,
+      "grad_norm": 0.0072021484375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 760
+    },
+    {
+      "epoch": 2.4367088607594938,
+      "grad_norm": 0.00921630859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 770
+    },
+    {
+      "epoch": 2.4683544303797467,
+      "grad_norm": 0.01239013671875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 780
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 0.01226806640625,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 790
+    },
+    {
+      "epoch": 2.5316455696202533,
+      "grad_norm": 0.00872802734375,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 800
+    },
+    {
+      "epoch": 2.5632911392405062,
+      "grad_norm": 0.007781982421875,
+      "learning_rate": 0.0002,
+      "loss": 0.0008,
+      "step": 810
+    },
+    {
+      "epoch": 2.5949367088607596,
+      "grad_norm": 0.005767822265625,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 820
+    },
+    {
+      "epoch": 2.6265822784810124,
+      "grad_norm": 0.0111083984375,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 830
+    },
+    {
+      "epoch": 2.6582278481012658,
+      "grad_norm": 0.0157470703125,
+      "learning_rate": 0.0002,
+      "loss": 0.0007,
+      "step": 840
+    },
+    {
+      "epoch": 2.689873417721519,
+      "grad_norm": 0.0111083984375,
+      "learning_rate": 0.0002,
+      "loss": 0.0008,
+      "step": 850
+    },
+    {
+      "epoch": 2.721518987341772,
+      "grad_norm": 0.009521484375,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 860
+    },
+    {
+      "epoch": 2.7531645569620253,
+      "grad_norm": 0.0018463134765625,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 870
+    },
+    {
+      "epoch": 2.7848101265822782,
+      "grad_norm": 0.00168609619140625,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 880
+    },
+    {
+      "epoch": 2.8164556962025316,
+      "grad_norm": 0.01470947265625,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 890
+    },
+    {
+      "epoch": 2.848101265822785,
+      "grad_norm": 0.0211181640625,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 900
+    },
+    {
+      "epoch": 2.879746835443038,
+      "grad_norm": 0.0057373046875,
+      "learning_rate": 0.0002,
+      "loss": 0.0017,
+      "step": 910
+    },
+    {
+      "epoch": 2.911392405063291,
+      "grad_norm": 0.00469970703125,
+      "learning_rate": 0.0002,
+      "loss": 0.0022,
+      "step": 920
+    },
+    {
+      "epoch": 2.9430379746835444,
+      "grad_norm": 0.00982666015625,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 930
+    },
+    {
+      "epoch": 2.9746835443037973,
+      "grad_norm": 0.0087890625,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 940
+    },
+    {
+      "epoch": 3.0063291139240507,
+      "grad_norm": 0.0098876953125,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 950
+    },
+    {
+      "epoch": 3.037974683544304,
+      "grad_norm": 0.00360107421875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 960
+    },
+    {
+      "epoch": 3.069620253164557,
+      "grad_norm": 0.0150146484375,
+      "learning_rate": 0.0002,
+      "loss": 0.0007,
+      "step": 970
+    },
+    {
+      "epoch": 3.1012658227848102,
+      "grad_norm": 0.005828857421875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 980
+    },
+    {
+      "epoch": 3.132911392405063,
+      "grad_norm": 0.00665283203125,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 990
+    },
+    {
+      "epoch": 3.1645569620253164,
+      "grad_norm": 0.005401611328125,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1000
+    },
+    {
+      "epoch": 3.1962025316455698,
+      "grad_norm": 0.0216064453125,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 1010
+    },
+    {
+      "epoch": 3.2278481012658227,
+      "grad_norm": 0.0152587890625,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 1020
+    },
+    {
+      "epoch": 3.259493670886076,
+      "grad_norm": 0.0096435546875,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1030
+    },
+    {
+      "epoch": 3.291139240506329,
+      "grad_norm": 0.007232666015625,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1040
+    },
+    {
+      "epoch": 3.3227848101265822,
+      "grad_norm": 0.006439208984375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1050
+    },
+    {
+      "epoch": 3.3544303797468356,
+      "grad_norm": 0.003692626953125,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 1060
+    },
+    {
+      "epoch": 3.3860759493670884,
+      "grad_norm": 0.0028839111328125,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1070
+    },
+    {
+      "epoch": 3.4177215189873418,
+      "grad_norm": 0.005859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1080
+    },
+    {
+      "epoch": 3.449367088607595,
+      "grad_norm": 0.0067138671875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1090
+    },
+    {
+      "epoch": 3.481012658227848,
+      "grad_norm": 0.007171630859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1100
+    },
+    {
+      "epoch": 3.5126582278481013,
+      "grad_norm": 0.00537109375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1110
+    },
+    {
+      "epoch": 3.5443037974683547,
+      "grad_norm": 0.00775146484375,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1120
+    },
+    {
+      "epoch": 3.5759493670886076,
+      "grad_norm": 0.0030517578125,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1130
+    },
+    {
+      "epoch": 3.607594936708861,
+      "grad_norm": 0.00823974609375,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1140
+    },
+    {
+      "epoch": 3.6392405063291138,
+      "grad_norm": 0.005157470703125,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1150
+    },
+    {
+      "epoch": 3.670886075949367,
+      "grad_norm": 0.01202392578125,
+      "learning_rate": 0.0002,
+      "loss": 0.0009,
+      "step": 1160
+    },
+    {
+      "epoch": 3.7025316455696204,
+      "grad_norm": 0.0233154296875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1170
+    },
+    {
+      "epoch": 3.7341772151898733,
+      "grad_norm": 0.005218505859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1180
+    },
+    {
+      "epoch": 3.7658227848101267,
+      "grad_norm": 0.0108642578125,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1190
+    },
+    {
+      "epoch": 3.7974683544303796,
+      "grad_norm": 0.0086669921875,
+      "learning_rate": 0.0002,
+      "loss": 0.0006,
+      "step": 1200
+    },
+    {
+      "epoch": 3.829113924050633,
+      "grad_norm": 0.002685546875,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1210
+    },
+    {
+      "epoch": 3.8607594936708862,
+      "grad_norm": 0.0019683837890625,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1220
+    },
+    {
+      "epoch": 3.892405063291139,
+      "grad_norm": 0.0020294189453125,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 1230
+    },
+    {
+      "epoch": 3.9240506329113924,
+      "grad_norm": 0.003509521484375,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1240
+    },
+    {
+      "epoch": 3.9556962025316453,
+      "grad_norm": 0.004486083984375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1250
+    },
+    {
+      "epoch": 3.9873417721518987,
+      "grad_norm": 0.0034637451171875,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1260
+    },
+    {
+      "epoch": 4.018987341772152,
+      "grad_norm": 0.0031890869140625,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1270
+    },
+    {
+      "epoch": 4.050632911392405,
+      "grad_norm": 0.0036163330078125,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1280
+    },
+    {
+      "epoch": 4.082278481012658,
+      "grad_norm": 0.01055908203125,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1290
+    },
+    {
+      "epoch": 4.113924050632911,
+      "grad_norm": 0.00188446044921875,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1300
+    },
+    {
+      "epoch": 4.1455696202531644,
+      "grad_norm": 0.0029296875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1310
+    },
+    {
+      "epoch": 4.177215189873418,
+      "grad_norm": 0.00167083740234375,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1320
+    },
+    {
+      "epoch": 4.208860759493671,
+      "grad_norm": 0.005584716796875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1330
+    },
+    {
+      "epoch": 4.2405063291139244,
+      "grad_norm": 0.007171630859375,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1340
+    },
+    {
+      "epoch": 4.272151898734177,
+      "grad_norm": 0.004119873046875,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1350
+    },
+    {
+      "epoch": 4.30379746835443,
+      "grad_norm": 0.00543212890625,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1360
+    },
+    {
+      "epoch": 4.3354430379746836,
+      "grad_norm": 0.00860595703125,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1370
+    },
+    {
+      "epoch": 4.367088607594937,
+      "grad_norm": 0.0130615234375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1380
+    },
+    {
+      "epoch": 4.39873417721519,
+      "grad_norm": 0.0074462890625,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1390
+    },
+    {
+      "epoch": 4.430379746835443,
+      "grad_norm": 0.00384521484375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1400
+    },
+    {
+      "epoch": 4.462025316455696,
+      "grad_norm": 0.0106201171875,
+      "learning_rate": 0.0002,
+      "loss": 0.0004,
+      "step": 1410
+    },
+    {
+      "epoch": 4.493670886075949,
+      "grad_norm": 0.0027618408203125,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1420
+    },
+    {
+      "epoch": 4.525316455696203,
+      "grad_norm": 0.00555419921875,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1430
+    },
+    {
+      "epoch": 4.556962025316456,
+      "grad_norm": 0.006011962890625,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1440
+    },
+    {
+      "epoch": 4.588607594936709,
+      "grad_norm": 0.006256103515625,
+      "learning_rate": 0.0002,
+      "loss": 0.0002,
+      "step": 1450
+    },
+    {
+      "epoch": 4.620253164556962,
+      "grad_norm": 0.003997802734375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1460
+    },
+    {
+      "epoch": 4.651898734177215,
+      "grad_norm": 0.006744384765625,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1470
+    },
+    {
+      "epoch": 4.6835443037974684,
+      "grad_norm": 0.00836181640625,
+      "learning_rate": 0.0002,
+      "loss": 0.0005,
+      "step": 1480
+    },
+    {
+      "epoch": 4.715189873417722,
+      "grad_norm": 0.0076904296875,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1490
+    },
+    {
+      "epoch": 4.746835443037975,
+      "grad_norm": 0.01239013671875,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1500
+    },
+    {
+      "epoch": 4.7784810126582276,
+      "grad_norm": 0.00146484375,
+      "learning_rate": 0.0002,
+      "loss": 0.0007,
+      "step": 1510
+    },
+    {
+      "epoch": 4.810126582278481,
+      "grad_norm": 0.00909423828125,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1520
+    },
+    {
+      "epoch": 4.841772151898734,
+      "grad_norm": 0.0133056640625,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1530
+    },
+    {
+      "epoch": 4.8734177215189876,
+      "grad_norm": 0.00360107421875,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1540
+    },
+    {
+      "epoch": 4.905063291139241,
+      "grad_norm": 0.00799560546875,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1550
+    },
+    {
+      "epoch": 4.936708860759493,
+      "grad_norm": 0.0047607421875,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1560
+    },
+    {
+      "epoch": 4.968354430379747,
+      "grad_norm": 0.00157928466796875,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1570
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.0101318359375,
+      "learning_rate": 0.0002,
+      "loss": 0.0003,
+      "step": 1580
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1580,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3.151495561120973e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b416a7cdaae4669e6b55c4b65a6c90fe81e4ebe506af5ef36ed892b2ba782ab7
+size 4987