terry69 commited on May 28

Commit

9610215

•

1 Parent(s): fbd6d64

Model save

Browse files

Files changed (28) hide show

README.md +67 -0
adapter_config.json +34 -0
adapter_model.safetensors +3 -0
all_results.json +9 -0
runs/May27_21-34-18_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716845862.ip-172-31-69-60.ec2.internal.23523.0 +3 -0
runs/May27_21-50-49_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846661.ip-172-31-69-60.ec2.internal.12422.0 +3 -0
runs/May27_21-51-46_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846718.ip-172-31-69-60.ec2.internal.14769.0 +3 -0
runs/May27_21-52-49_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846780.ip-172-31-69-60.ec2.internal.17185.0 +3 -0
runs/May27_21-53-25_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846816.ip-172-31-69-60.ec2.internal.18768.0 +3 -0
runs/May27_21-54-02_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846854.ip-172-31-69-60.ec2.internal.20423.0 +3 -0
runs/May27_21-54-40_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846892.ip-172-31-69-60.ec2.internal.22058.0 +3 -0
runs/May27_21-57-52_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716847084.ip-172-31-69-60.ec2.internal.28976.0 +3 -0
runs/May27_21-58-59_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716847151.ip-172-31-69-60.ec2.internal.31493.0 +3 -0
runs/May27_22-08-59_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716847751.ip-172-31-69-60.ec2.internal.4939.0 +3 -0
runs/May27_22-11-24_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716847896.ip-172-31-69-60.ec2.internal.9970.0 +3 -0
runs/May27_22-14-02_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848053.ip-172-31-69-60.ec2.internal.14045.0 +3 -0
runs/May27_22-15-16_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848128.ip-172-31-69-60.ec2.internal.16064.0 +3 -0
runs/May27_22-17-27_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848272.ip-172-31-69-60.ec2.internal.19273.0 +3 -0
runs/May27_22-19-18_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848375.ip-172-31-69-60.ec2.internal.21785.0 +3 -0
runs/May27_22-20-22_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848439.ip-172-31-69-60.ec2.internal.23944.0 +3 -0
runs/May27_22-22-05_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848542.ip-172-31-69-60.ec2.internal.26567.0 +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +43 -0
train_results.json +9 -0
trainer_state.json +393 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,67 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- trl
+- sft
+- generated_from_trainer
+base_model: mistralai/Mistral-7B-v0.1
+model-index:
+- name: mistral5p
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# mistral5p
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: nan
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 8
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 128
+- total_eval_batch_size: 4
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 0.7092        | 0.9969 | 243  | nan             |
+### Framework versions
+- PEFT 0.11.1
+- Transformers 4.41.1
+- Pytorch 2.2.2+cu121
+- Datasets 2.19.1
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 8,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 6,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "gate_proj",
+    "o_proj",
+    "q_proj",
+    "down_proj",
+    "up_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fe90f2f98208e74edd203b571c5549da46239bd0283d4b96ce66fefe4c7662e
+size 31516744

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 0.9969230769230769,
+    "total_flos": 5623477606285312.0,
+    "train_loss": 0.7391852758548878,
+    "train_runtime": 18302.1626,
+    "train_samples": 31180,
+    "train_samples_per_second": 1.704,
+    "train_steps_per_second": 0.013
+}

runs/May27_21-34-18_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716845862.ip-172-31-69-60.ec2.internal.23523.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9aada4a7ded10c996b91d89d1038c9d6afa059db9a60396875228bd2cadc4bf
+size 5572

runs/May27_21-50-49_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846661.ip-172-31-69-60.ec2.internal.12422.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:685a46cf146655868a9c7c816a1407092b7ae9e7c2d2463e7fe76988dc6d1815
+size 5986

runs/May27_21-51-46_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846718.ip-172-31-69-60.ec2.internal.14769.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5c235543a273abfab4a0dd02f710aaa75cd4bffa34fe4427025257619af23b3
+size 5572

runs/May27_21-52-49_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846780.ip-172-31-69-60.ec2.internal.17185.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5678cbb9bc0e044dd777bd532c4e6b0a35d047e1189643add443beb71f2bbdd8
+size 5572

runs/May27_21-53-25_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846816.ip-172-31-69-60.ec2.internal.18768.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a56fdc42bc86ce1f9f4d2292c12ef556b5c654b28455e31522da1ea1a7ceed2
+size 5572

runs/May27_21-54-02_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846854.ip-172-31-69-60.ec2.internal.20423.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9c0ecd45778978ae693e63ae10a1380c32936acd8df595cdfefdd5895a5ceb4
+size 5779

runs/May27_21-54-40_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716846892.ip-172-31-69-60.ec2.internal.22058.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efbc29668edf80f66fd2c9fdfad8efa18909e53179d79558f9094213f7733a78
+size 5779

runs/May27_21-57-52_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716847084.ip-172-31-69-60.ec2.internal.28976.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed62cd56c05e39c6220bfed90bb4586299ec6b228ca01858819f1ef85fd7d2c0
+size 5986

runs/May27_21-58-59_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716847151.ip-172-31-69-60.ec2.internal.31493.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7de6497d021dba6f9e1ddca9db4b3ba92c61d71071f538c0051d7d1b50fcf6d7
+size 5572

runs/May27_22-08-59_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716847751.ip-172-31-69-60.ec2.internal.4939.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f2a46949ec2aa63756c8035a9ca6d575538cfd03ce8aa14c353de0bca24e331a
+size 5572

runs/May27_22-11-24_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716847896.ip-172-31-69-60.ec2.internal.9970.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd6e6b08da14c3893e6e1e49c80610339befd2d5b85901abfa7171171d4416ed
+size 5572

runs/May27_22-14-02_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848053.ip-172-31-69-60.ec2.internal.14045.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34842942951fc88c3f4b4bb08a82d2ef804f410ddb067136dc4aad95b65576e1
+size 5986

runs/May27_22-15-16_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848128.ip-172-31-69-60.ec2.internal.16064.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dbadc2f17fe22a11779d81ea25a4a4e71b7eba34a3b7c215c76db59308d23e7c
+size 5572

runs/May27_22-17-27_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848272.ip-172-31-69-60.ec2.internal.19273.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:685d65721234f727b412b2430e4b97639e452c958e8eb1b626ca60da6fc9ac51
+size 5986

runs/May27_22-19-18_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848375.ip-172-31-69-60.ec2.internal.21785.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c4a080c09bd356ea0915e82e47e65a430773626cde4730374120b415ed7f27b
+size 5297

runs/May27_22-20-22_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848439.ip-172-31-69-60.ec2.internal.23944.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ca769519f176451f3c8064f5d5c1e25154c69c9563dd89806e9cde06356b871
+size 5297

runs/May27_22-22-05_ip-172-31-69-60.ec2.internal/events.out.tfevents.1716848542.ip-172-31-69-60.ec2.internal.26567.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aabc977d6457a22fd50b73f0d67dfef24963b94f6c128b9d9f0ccd5e437ce377
+size 15950

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n'  + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 2048,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 0.9969230769230769,
+    "total_flos": 5623477606285312.0,
+    "train_loss": 0.7391852758548878,
+    "train_runtime": 18302.1626,
+    "train_samples": 31180,
+    "train_samples_per_second": 1.704,
+    "train_steps_per_second": 0.013
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,393 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.9969230769230769,
+  "eval_steps": 500,
+  "global_step": 243,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0041025641025641026,
+      "grad_norm": 0.26542961092188894,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.8534,
+      "step": 1
+    },
+    {
+      "epoch": 0.020512820512820513,
+      "grad_norm": 0.2656899378598073,
+      "learning_rate": 4e-05,
+      "loss": 0.8667,
+      "step": 5
+    },
+    {
+      "epoch": 0.041025641025641026,
+      "grad_norm": 0.2572181527992512,
+      "learning_rate": 8e-05,
+      "loss": 0.8031,
+      "step": 10
+    },
+    {
+      "epoch": 0.06153846153846154,
+      "grad_norm": 0.18710816410634198,
+      "learning_rate": 0.00012,
+      "loss": 0.7552,
+      "step": 15
+    },
+    {
+      "epoch": 0.08205128205128205,
+      "grad_norm": 0.17366628242950644,
+      "learning_rate": 0.00016,
+      "loss": 0.7434,
+      "step": 20
+    },
+    {
+      "epoch": 0.10256410256410256,
+      "grad_norm": 0.18209587440813843,
+      "learning_rate": 0.0002,
+      "loss": 0.7613,
+      "step": 25
+    },
+    {
+      "epoch": 0.12307692307692308,
+      "grad_norm": 0.17584855523843826,
+      "learning_rate": 0.00019974051702905277,
+      "loss": 0.7686,
+      "step": 30
+    },
+    {
+      "epoch": 0.14358974358974358,
+      "grad_norm": 0.1876526868957483,
+      "learning_rate": 0.00019896341474445525,
+      "loss": 0.7408,
+      "step": 35
+    },
+    {
+      "epoch": 0.1641025641025641,
+      "grad_norm": 0.14962305137903897,
+      "learning_rate": 0.00019767272604239824,
+      "loss": 0.7422,
+      "step": 40
+    },
+    {
+      "epoch": 0.18461538461538463,
+      "grad_norm": 0.16336671883236106,
+      "learning_rate": 0.00019587514915766124,
+      "loss": 0.7565,
+      "step": 45
+    },
+    {
+      "epoch": 0.20512820512820512,
+      "grad_norm": 0.15152018240587575,
+      "learning_rate": 0.00019358001290205543,
+      "loss": 0.7493,
+      "step": 50
+    },
+    {
+      "epoch": 0.22564102564102564,
+      "grad_norm": 0.14810534949892065,
+      "learning_rate": 0.0001907992282510675,
+      "loss": 0.7539,
+      "step": 55
+    },
+    {
+      "epoch": 0.24615384615384617,
+      "grad_norm": 0.16847734140964582,
+      "learning_rate": 0.00018754722652995347,
+      "loss": 0.7395,
+      "step": 60
+    },
+    {
+      "epoch": 0.26666666666666666,
+      "grad_norm": 0.15436628692835286,
+      "learning_rate": 0.00018384088452007578,
+      "loss": 0.747,
+      "step": 65
+    },
+    {
+      "epoch": 0.28717948717948716,
+      "grad_norm": 0.15652291403007046,
+      "learning_rate": 0.00017969943687415576,
+      "loss": 0.7506,
+      "step": 70
+    },
+    {
+      "epoch": 0.3076923076923077,
+      "grad_norm": 0.16413347022113,
+      "learning_rate": 0.0001751443762949772,
+      "loss": 0.7611,
+      "step": 75
+    },
+    {
+      "epoch": 0.3282051282051282,
+      "grad_norm": 0.16182771329972745,
+      "learning_rate": 0.00017019934199557867,
+      "loss": 0.7576,
+      "step": 80
+    },
+    {
+      "epoch": 0.3487179487179487,
+      "grad_norm": 0.137569364566755,
+      "learning_rate": 0.00016488999701978903,
+      "loss": 0.7451,
+      "step": 85
+    },
+    {
+      "epoch": 0.36923076923076925,
+      "grad_norm": 0.14900540538806303,
+      "learning_rate": 0.00015924389505977038,
+      "loss": 0.7197,
+      "step": 90
+    },
+    {
+      "epoch": 0.38974358974358975,
+      "grad_norm": 0.14655990407738764,
+      "learning_rate": 0.00015329033746173975,
+      "loss": 0.7149,
+      "step": 95
+    },
+    {
+      "epoch": 0.41025641025641024,
+      "grad_norm": 0.1402768270199422,
+      "learning_rate": 0.00014706022116196208,
+      "loss": 0.7018,
+      "step": 100
+    },
+    {
+      "epoch": 0.4307692307692308,
+      "grad_norm": 0.14134928021057855,
+      "learning_rate": 0.00014058587834217355,
+      "loss": 0.7324,
+      "step": 105
+    },
+    {
+      "epoch": 0.4512820512820513,
+      "grad_norm": 0.15600188868890955,
+      "learning_rate": 0.00013390090863657047,
+      "loss": 0.748,
+      "step": 110
+    },
+    {
+      "epoch": 0.4717948717948718,
+      "grad_norm": 0.13996117144988574,
+      "learning_rate": 0.0001270400047611508,
+      "loss": 0.7703,
+      "step": 115
+    },
+    {
+      "epoch": 0.49230769230769234,
+      "grad_norm": 0.1510982087519504,
+      "learning_rate": 0.00012003877247033411,
+      "loss": 0.7515,
+      "step": 120
+    },
+    {
+      "epoch": 0.5128205128205128,
+      "grad_norm": 0.14250885612041378,
+      "learning_rate": 0.00011293354577522263,
+      "loss": 0.7196,
+      "step": 125
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 0.14795873562666287,
+      "learning_rate": 0.00010576119838245844,
+      "loss": 0.731,
+      "step": 130
+    },
+    {
+      "epoch": 0.5538461538461539,
+      "grad_norm": 0.13063022182064904,
+      "learning_rate": 9.85589523322443e-05,
+      "loss": 0.7301,
+      "step": 135
+    },
+    {
+      "epoch": 0.5743589743589743,
+      "grad_norm": 0.13557487318693218,
+      "learning_rate": 9.136418482863229e-05,
+      "loss": 0.718,
+      "step": 140
+    },
+    {
+      "epoch": 0.5948717948717949,
+      "grad_norm": 0.15507641481392034,
+      "learning_rate": 8.42142342645646e-05,
+      "loss": 0.7193,
+      "step": 145
+    },
+    {
+      "epoch": 0.6153846153846154,
+      "grad_norm": 0.15494002592225475,
+      "learning_rate": 7.714620644833111e-05,
+      "loss": 0.731,
+      "step": 150
+    },
+    {
+      "epoch": 0.6358974358974359,
+      "grad_norm": 0.1601203804358568,
+      "learning_rate": 7.019678203706163e-05,
+      "loss": 0.75,
+      "step": 155
+    },
+    {
+      "epoch": 0.6564102564102564,
+      "grad_norm": 0.15685275896075176,
+      "learning_rate": 6.340202617660842e-05,
+      "loss": 0.7505,
+      "step": 160
+    },
+    {
+      "epoch": 0.676923076923077,
+      "grad_norm": 0.1416704035001306,
+      "learning_rate": 5.679720133572206e-05,
+      "loss": 0.7311,
+      "step": 165
+    },
+    {
+      "epoch": 0.6974358974358974,
+      "grad_norm": 0.14126335283382643,
+      "learning_rate": 5.0416584305848524e-05,
+      "loss": 0.755,
+      "step": 170
+    },
+    {
+      "epoch": 0.717948717948718,
+      "grad_norm": 0.13481365291511518,
+      "learning_rate": 4.4293288316255653e-05,
+      "loss": 0.695,
+      "step": 175
+    },
+    {
+      "epoch": 0.7384615384615385,
+      "grad_norm": 0.14785228000969083,
+      "learning_rate": 3.845909118765073e-05,
+      "loss": 0.7209,
+      "step": 180
+    },
+    {
+      "epoch": 0.7589743589743589,
+      "grad_norm": 0.1503268472656009,
+      "learning_rate": 3.294427041611425e-05,
+      "loss": 0.7307,
+      "step": 185
+    },
+    {
+      "epoch": 0.7794871794871795,
+      "grad_norm": 0.15299701883959732,
+      "learning_rate": 2.7777446043207058e-05,
+      "loss": 0.7351,
+      "step": 190
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.1373494159366909,
+      "learning_rate": 2.2985432127701946e-05,
+      "loss": 0.7304,
+      "step": 195
+    },
+    {
+      "epoch": 0.8205128205128205,
+      "grad_norm": 0.16245499908137687,
+      "learning_rate": 1.859309758975132e-05,
+      "loss": 0.7385,
+      "step": 200
+    },
+    {
+      "epoch": 0.841025641025641,
+      "grad_norm": 0.13807085431435603,
+      "learning_rate": 1.462323714966114e-05,
+      "loss": 0.714,
+      "step": 205
+    },
+    {
+      "epoch": 0.8615384615384616,
+      "grad_norm": 0.12926916684005163,
+      "learning_rate": 1.1096453031056264e-05,
+      "loss": 0.7078,
+      "step": 210
+    },
+    {
+      "epoch": 0.882051282051282,
+      "grad_norm": 0.12918592796308734,
+      "learning_rate": 8.031048042356392e-06,
+      "loss": 0.7208,
+      "step": 215
+    },
+    {
+      "epoch": 0.9025641025641026,
+      "grad_norm": 0.14837431396022016,
+      "learning_rate": 5.442930591433992e-06,
+      "loss": 0.7435,
+      "step": 220
+    },
+    {
+      "epoch": 0.9230769230769231,
+      "grad_norm": 0.14838485907528307,
+      "learning_rate": 3.3455321263955786e-06,
+      "loss": 0.7255,
+      "step": 225
+    },
+    {
+      "epoch": 0.9435897435897436,
+      "grad_norm": 0.1480931290003826,
+      "learning_rate": 1.7497374309405346e-06,
+      "loss": 0.695,
+      "step": 230
+    },
+    {
+      "epoch": 0.9641025641025641,
+      "grad_norm": 0.14733343614150893,
+      "learning_rate": 6.638281360408339e-07,
+      "loss": 0.7012,
+      "step": 235
+    },
+    {
+      "epoch": 0.9846153846153847,
+      "grad_norm": 0.1368626907349573,
+      "learning_rate": 9.343974109685682e-08,
+      "loss": 0.7092,
+      "step": 240
+    },
+    {
+      "epoch": 0.9969230769230769,
+      "eval_loss": NaN,
+      "eval_runtime": 748.4122,
+      "eval_samples_per_second": 1.545,
+      "eval_steps_per_second": 0.386,
+      "step": 243
+    },
+    {
+      "epoch": 0.9969230769230769,
+      "step": 243,
+      "total_flos": 5623477606285312.0,
+      "train_loss": 0.7391852758548878,
+      "train_runtime": 18302.1626,
+      "train_samples_per_second": 1.704,
+      "train_steps_per_second": 0.013
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 243,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 1000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5623477606285312.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92484e9a5c8df8e8c9f43c40230cc189f21535aa92db5a37bb10f16e93a8e979
+size 6456