AmberYifan commited on
Commit
eb50c31
1 Parent(s): 8220e38

Model save

Browse files
README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: microsoft/Phi-3-small-8k-instruct
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: phi3-spin-zephyr-data
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # phi3-spin-zephyr-data
15
+
16
+ This model is a fine-tuned version of [microsoft/Phi-3-small-8k-instruct](https://huggingface.co/microsoft/Phi-3-small-8k-instruct) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.1680
19
+ - Rewards/real: -6.9251
20
+ - Rewards/generated: -22.7311
21
+ - Rewards/accuracies: 0.9443
22
+ - Rewards/margins: 15.8060
23
+ - Logps/generated: -518.7460
24
+ - Logps/real: -321.6379
25
+ - Logits/generated: -inf
26
+ - Logits/real: -inf
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-07
46
+ - train_batch_size: 8
47
+ - eval_batch_size: 8
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - num_devices: 4
51
+ - total_train_batch_size: 32
52
+ - total_eval_batch_size: 32
53
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
+ - lr_scheduler_type: linear
55
+ - lr_scheduler_warmup_ratio: 0.1
56
+ - num_epochs: 1
57
+
58
+ ### Training results
59
+
60
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
61
+ |:-------------:|:-----:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
62
+ | 0.2807 | 0.64 | 500 | 0.1680 | -6.9251 | -22.7311 | 0.9443 | 15.8060 | -518.7460 | -321.6379 | -inf | -inf |
63
+
64
+
65
+ ### Framework versions
66
+
67
+ - Transformers 4.37.0
68
+ - Pytorch 2.1.2+cu121
69
+ - Datasets 2.14.6
70
+ - Tokenizers 0.15.2
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6284302195911938,
4
+ "train_runtime": 5860.6637,
5
+ "train_samples": 25000,
6
+ "train_samples_per_second": 4.266,
7
+ "train_steps_per_second": 0.133
8
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 100257,
4
+ "eos_token_id": [
5
+ 100257,
6
+ 100266
7
+ ],
8
+ "transformers_version": "4.37.0"
9
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e61a098b9291287a13df8ae4fcdb00643a5ec324bf65af655801e5cb47a21618
3
+ size 4832943104
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b112110dccae5c1237da32a3ef6bd78b31a00f092836ea5d6725a775764041d
3
+ size 4799608224
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b8607487f2f6919b39039edda7f2d6f3a681042b5433ed53b46c2b0e1484738
3
+ size 4799608240
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:713883d1a11f48447b2142edf25e4ea6b77b0ed0851ce9d404c3ca3b854a8975
3
+ size 352437304
model.safetensors.index.json ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14784548864
4
+ },
5
+ "weight_map": {
6
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
7
+ "model.final_layernorm.bias": "model-00004-of-00004.safetensors",
8
+ "model.final_layernorm.weight": "model-00004-of-00004.safetensors",
9
+ "model.layers.0.input_layernorm.bias": "model-00001-of-00004.safetensors",
10
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
11
+ "model.layers.0.mlp.down_proj.bias": "model-00001-of-00004.safetensors",
12
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
13
+ "model.layers.0.mlp.up_proj.bias": "model-00001-of-00004.safetensors",
14
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
15
+ "model.layers.0.post_attention_layernorm.bias": "model-00001-of-00004.safetensors",
16
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
17
+ "model.layers.0.self_attn.dense.bias": "model-00001-of-00004.safetensors",
18
+ "model.layers.0.self_attn.dense.weight": "model-00001-of-00004.safetensors",
19
+ "model.layers.0.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
20
+ "model.layers.0.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
21
+ "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
22
+ "model.layers.1.input_layernorm.bias": "model-00001-of-00004.safetensors",
23
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
24
+ "model.layers.1.mlp.down_proj.bias": "model-00001-of-00004.safetensors",
25
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
26
+ "model.layers.1.mlp.up_proj.bias": "model-00001-of-00004.safetensors",
27
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
28
+ "model.layers.1.post_attention_layernorm.bias": "model-00001-of-00004.safetensors",
29
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
30
+ "model.layers.1.self_attn.dense.bias": "model-00001-of-00004.safetensors",
31
+ "model.layers.1.self_attn.dense.weight": "model-00001-of-00004.safetensors",
32
+ "model.layers.1.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
33
+ "model.layers.1.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
34
+ "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
35
+ "model.layers.10.input_layernorm.bias": "model-00002-of-00004.safetensors",
36
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "model.layers.10.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
38
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
39
+ "model.layers.10.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
40
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
41
+ "model.layers.10.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
42
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
43
+ "model.layers.10.self_attn.dense.bias": "model-00002-of-00004.safetensors",
44
+ "model.layers.10.self_attn.dense.weight": "model-00002-of-00004.safetensors",
45
+ "model.layers.10.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
46
+ "model.layers.10.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
47
+ "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
48
+ "model.layers.11.input_layernorm.bias": "model-00002-of-00004.safetensors",
49
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
50
+ "model.layers.11.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
51
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
52
+ "model.layers.11.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
53
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
54
+ "model.layers.11.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
55
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
56
+ "model.layers.11.self_attn.dense.bias": "model-00002-of-00004.safetensors",
57
+ "model.layers.11.self_attn.dense.weight": "model-00002-of-00004.safetensors",
58
+ "model.layers.11.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
59
+ "model.layers.11.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
60
+ "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
61
+ "model.layers.12.input_layernorm.bias": "model-00002-of-00004.safetensors",
62
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
63
+ "model.layers.12.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
64
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
65
+ "model.layers.12.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
66
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
67
+ "model.layers.12.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
68
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
69
+ "model.layers.12.self_attn.dense.bias": "model-00002-of-00004.safetensors",
70
+ "model.layers.12.self_attn.dense.weight": "model-00002-of-00004.safetensors",
71
+ "model.layers.12.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
72
+ "model.layers.12.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
73
+ "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
74
+ "model.layers.13.input_layernorm.bias": "model-00002-of-00004.safetensors",
75
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
76
+ "model.layers.13.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
77
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
78
+ "model.layers.13.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
79
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
80
+ "model.layers.13.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
81
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
82
+ "model.layers.13.self_attn.dense.bias": "model-00002-of-00004.safetensors",
83
+ "model.layers.13.self_attn.dense.weight": "model-00002-of-00004.safetensors",
84
+ "model.layers.13.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
85
+ "model.layers.13.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
86
+ "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
87
+ "model.layers.14.input_layernorm.bias": "model-00002-of-00004.safetensors",
88
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
89
+ "model.layers.14.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
90
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
91
+ "model.layers.14.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
92
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
93
+ "model.layers.14.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
94
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
95
+ "model.layers.14.self_attn.dense.bias": "model-00002-of-00004.safetensors",
96
+ "model.layers.14.self_attn.dense.weight": "model-00002-of-00004.safetensors",
97
+ "model.layers.14.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
98
+ "model.layers.14.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
100
+ "model.layers.15.input_layernorm.bias": "model-00002-of-00004.safetensors",
101
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.15.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
103
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.15.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
105
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.15.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
107
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.15.self_attn.dense.bias": "model-00002-of-00004.safetensors",
109
+ "model.layers.15.self_attn.dense.weight": "model-00002-of-00004.safetensors",
110
+ "model.layers.15.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
111
+ "model.layers.15.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
113
+ "model.layers.16.input_layernorm.bias": "model-00002-of-00004.safetensors",
114
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
115
+ "model.layers.16.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
116
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
117
+ "model.layers.16.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
118
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
119
+ "model.layers.16.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
120
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
121
+ "model.layers.16.self_attn.dense.bias": "model-00002-of-00004.safetensors",
122
+ "model.layers.16.self_attn.dense.weight": "model-00002-of-00004.safetensors",
123
+ "model.layers.16.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
124
+ "model.layers.16.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
125
+ "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
126
+ "model.layers.17.input_layernorm.bias": "model-00002-of-00004.safetensors",
127
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
128
+ "model.layers.17.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
129
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
130
+ "model.layers.17.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
131
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.17.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
133
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
134
+ "model.layers.17.self_attn.dense.bias": "model-00002-of-00004.safetensors",
135
+ "model.layers.17.self_attn.dense.weight": "model-00002-of-00004.safetensors",
136
+ "model.layers.17.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
137
+ "model.layers.17.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
138
+ "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
139
+ "model.layers.18.input_layernorm.bias": "model-00002-of-00004.safetensors",
140
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
141
+ "model.layers.18.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
142
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
143
+ "model.layers.18.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
144
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
145
+ "model.layers.18.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
146
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
147
+ "model.layers.18.self_attn.dense.bias": "model-00002-of-00004.safetensors",
148
+ "model.layers.18.self_attn.dense.weight": "model-00002-of-00004.safetensors",
149
+ "model.layers.18.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
150
+ "model.layers.18.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
151
+ "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
152
+ "model.layers.19.input_layernorm.bias": "model-00002-of-00004.safetensors",
153
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
154
+ "model.layers.19.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
155
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
156
+ "model.layers.19.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
157
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
158
+ "model.layers.19.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
159
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
160
+ "model.layers.19.self_attn.dense.bias": "model-00002-of-00004.safetensors",
161
+ "model.layers.19.self_attn.dense.weight": "model-00002-of-00004.safetensors",
162
+ "model.layers.19.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
163
+ "model.layers.19.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
164
+ "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
165
+ "model.layers.2.input_layernorm.bias": "model-00001-of-00004.safetensors",
166
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
167
+ "model.layers.2.mlp.down_proj.bias": "model-00001-of-00004.safetensors",
168
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
169
+ "model.layers.2.mlp.up_proj.bias": "model-00001-of-00004.safetensors",
170
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
171
+ "model.layers.2.post_attention_layernorm.bias": "model-00001-of-00004.safetensors",
172
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
173
+ "model.layers.2.self_attn.dense.bias": "model-00001-of-00004.safetensors",
174
+ "model.layers.2.self_attn.dense.weight": "model-00001-of-00004.safetensors",
175
+ "model.layers.2.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
176
+ "model.layers.2.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
177
+ "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
178
+ "model.layers.20.input_layernorm.bias": "model-00003-of-00004.safetensors",
179
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
180
+ "model.layers.20.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
181
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
182
+ "model.layers.20.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
183
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
184
+ "model.layers.20.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
185
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.20.self_attn.dense.bias": "model-00002-of-00004.safetensors",
187
+ "model.layers.20.self_attn.dense.weight": "model-00002-of-00004.safetensors",
188
+ "model.layers.20.self_attn.query_key_value.bias": "model-00002-of-00004.safetensors",
189
+ "model.layers.20.self_attn.query_key_value.weight": "model-00002-of-00004.safetensors",
190
+ "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00002-of-00004.safetensors",
191
+ "model.layers.21.input_layernorm.bias": "model-00003-of-00004.safetensors",
192
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
193
+ "model.layers.21.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
194
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
195
+ "model.layers.21.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
196
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
197
+ "model.layers.21.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
198
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
199
+ "model.layers.21.self_attn.dense.bias": "model-00003-of-00004.safetensors",
200
+ "model.layers.21.self_attn.dense.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.21.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
202
+ "model.layers.21.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
203
+ "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
204
+ "model.layers.22.input_layernorm.bias": "model-00003-of-00004.safetensors",
205
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
206
+ "model.layers.22.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
207
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.22.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
209
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.22.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
211
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.22.self_attn.dense.bias": "model-00003-of-00004.safetensors",
213
+ "model.layers.22.self_attn.dense.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.22.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
215
+ "model.layers.22.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
216
+ "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
217
+ "model.layers.23.input_layernorm.bias": "model-00003-of-00004.safetensors",
218
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
219
+ "model.layers.23.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
220
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
221
+ "model.layers.23.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
222
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
223
+ "model.layers.23.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
224
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
225
+ "model.layers.23.self_attn.dense.bias": "model-00003-of-00004.safetensors",
226
+ "model.layers.23.self_attn.dense.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.23.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
228
+ "model.layers.23.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
230
+ "model.layers.24.input_layernorm.bias": "model-00003-of-00004.safetensors",
231
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.24.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
233
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
234
+ "model.layers.24.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
235
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
236
+ "model.layers.24.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
237
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
238
+ "model.layers.24.self_attn.dense.bias": "model-00003-of-00004.safetensors",
239
+ "model.layers.24.self_attn.dense.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.24.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
241
+ "model.layers.24.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
242
+ "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
243
+ "model.layers.25.input_layernorm.bias": "model-00003-of-00004.safetensors",
244
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
245
+ "model.layers.25.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
246
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
247
+ "model.layers.25.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
248
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
249
+ "model.layers.25.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
250
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
251
+ "model.layers.25.self_attn.dense.bias": "model-00003-of-00004.safetensors",
252
+ "model.layers.25.self_attn.dense.weight": "model-00003-of-00004.safetensors",
253
+ "model.layers.25.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
254
+ "model.layers.25.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
255
+ "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
256
+ "model.layers.26.input_layernorm.bias": "model-00003-of-00004.safetensors",
257
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
258
+ "model.layers.26.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
259
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
260
+ "model.layers.26.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
261
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
262
+ "model.layers.26.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
263
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
264
+ "model.layers.26.self_attn.dense.bias": "model-00003-of-00004.safetensors",
265
+ "model.layers.26.self_attn.dense.weight": "model-00003-of-00004.safetensors",
266
+ "model.layers.26.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
267
+ "model.layers.26.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
268
+ "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
269
+ "model.layers.27.input_layernorm.bias": "model-00003-of-00004.safetensors",
270
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
271
+ "model.layers.27.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
272
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
273
+ "model.layers.27.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
274
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
275
+ "model.layers.27.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
276
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
277
+ "model.layers.27.self_attn.dense.bias": "model-00003-of-00004.safetensors",
278
+ "model.layers.27.self_attn.dense.weight": "model-00003-of-00004.safetensors",
279
+ "model.layers.27.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
280
+ "model.layers.27.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
281
+ "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
282
+ "model.layers.28.input_layernorm.bias": "model-00003-of-00004.safetensors",
283
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
284
+ "model.layers.28.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
285
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
286
+ "model.layers.28.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
287
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
288
+ "model.layers.28.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
289
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
290
+ "model.layers.28.self_attn.dense.bias": "model-00003-of-00004.safetensors",
291
+ "model.layers.28.self_attn.dense.weight": "model-00003-of-00004.safetensors",
292
+ "model.layers.28.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
293
+ "model.layers.28.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
294
+ "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
295
+ "model.layers.29.input_layernorm.bias": "model-00003-of-00004.safetensors",
296
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
297
+ "model.layers.29.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
298
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
299
+ "model.layers.29.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
300
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
301
+ "model.layers.29.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
302
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
303
+ "model.layers.29.self_attn.dense.bias": "model-00003-of-00004.safetensors",
304
+ "model.layers.29.self_attn.dense.weight": "model-00003-of-00004.safetensors",
305
+ "model.layers.29.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
306
+ "model.layers.29.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
307
+ "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
308
+ "model.layers.3.input_layernorm.bias": "model-00001-of-00004.safetensors",
309
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
310
+ "model.layers.3.mlp.down_proj.bias": "model-00001-of-00004.safetensors",
311
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
312
+ "model.layers.3.mlp.up_proj.bias": "model-00001-of-00004.safetensors",
313
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
314
+ "model.layers.3.post_attention_layernorm.bias": "model-00001-of-00004.safetensors",
315
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
316
+ "model.layers.3.self_attn.dense.bias": "model-00001-of-00004.safetensors",
317
+ "model.layers.3.self_attn.dense.weight": "model-00001-of-00004.safetensors",
318
+ "model.layers.3.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
319
+ "model.layers.3.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
320
+ "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
321
+ "model.layers.30.input_layernorm.bias": "model-00003-of-00004.safetensors",
322
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
323
+ "model.layers.30.mlp.down_proj.bias": "model-00003-of-00004.safetensors",
324
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
325
+ "model.layers.30.mlp.up_proj.bias": "model-00003-of-00004.safetensors",
326
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
327
+ "model.layers.30.post_attention_layernorm.bias": "model-00003-of-00004.safetensors",
328
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
329
+ "model.layers.30.self_attn.dense.bias": "model-00003-of-00004.safetensors",
330
+ "model.layers.30.self_attn.dense.weight": "model-00003-of-00004.safetensors",
331
+ "model.layers.30.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
332
+ "model.layers.30.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
333
+ "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
334
+ "model.layers.31.input_layernorm.bias": "model-00004-of-00004.safetensors",
335
+ "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
336
+ "model.layers.31.mlp.down_proj.bias": "model-00004-of-00004.safetensors",
337
+ "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
338
+ "model.layers.31.mlp.up_proj.bias": "model-00004-of-00004.safetensors",
339
+ "model.layers.31.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
340
+ "model.layers.31.post_attention_layernorm.bias": "model-00004-of-00004.safetensors",
341
+ "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
342
+ "model.layers.31.self_attn.dense.bias": "model-00003-of-00004.safetensors",
343
+ "model.layers.31.self_attn.dense.weight": "model-00003-of-00004.safetensors",
344
+ "model.layers.31.self_attn.query_key_value.bias": "model-00003-of-00004.safetensors",
345
+ "model.layers.31.self_attn.query_key_value.weight": "model-00003-of-00004.safetensors",
346
+ "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00003-of-00004.safetensors",
347
+ "model.layers.4.input_layernorm.bias": "model-00001-of-00004.safetensors",
348
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
349
+ "model.layers.4.mlp.down_proj.bias": "model-00001-of-00004.safetensors",
350
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
351
+ "model.layers.4.mlp.up_proj.bias": "model-00001-of-00004.safetensors",
352
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
353
+ "model.layers.4.post_attention_layernorm.bias": "model-00001-of-00004.safetensors",
354
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
355
+ "model.layers.4.self_attn.dense.bias": "model-00001-of-00004.safetensors",
356
+ "model.layers.4.self_attn.dense.weight": "model-00001-of-00004.safetensors",
357
+ "model.layers.4.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
358
+ "model.layers.4.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
359
+ "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
360
+ "model.layers.5.input_layernorm.bias": "model-00001-of-00004.safetensors",
361
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
362
+ "model.layers.5.mlp.down_proj.bias": "model-00001-of-00004.safetensors",
363
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
364
+ "model.layers.5.mlp.up_proj.bias": "model-00001-of-00004.safetensors",
365
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
366
+ "model.layers.5.post_attention_layernorm.bias": "model-00001-of-00004.safetensors",
367
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
368
+ "model.layers.5.self_attn.dense.bias": "model-00001-of-00004.safetensors",
369
+ "model.layers.5.self_attn.dense.weight": "model-00001-of-00004.safetensors",
370
+ "model.layers.5.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
371
+ "model.layers.5.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
372
+ "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
373
+ "model.layers.6.input_layernorm.bias": "model-00001-of-00004.safetensors",
374
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
375
+ "model.layers.6.mlp.down_proj.bias": "model-00001-of-00004.safetensors",
376
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
377
+ "model.layers.6.mlp.up_proj.bias": "model-00001-of-00004.safetensors",
378
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
379
+ "model.layers.6.post_attention_layernorm.bias": "model-00001-of-00004.safetensors",
380
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
381
+ "model.layers.6.self_attn.dense.bias": "model-00001-of-00004.safetensors",
382
+ "model.layers.6.self_attn.dense.weight": "model-00001-of-00004.safetensors",
383
+ "model.layers.6.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
384
+ "model.layers.6.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
385
+ "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
386
+ "model.layers.7.input_layernorm.bias": "model-00001-of-00004.safetensors",
387
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
388
+ "model.layers.7.mlp.down_proj.bias": "model-00001-of-00004.safetensors",
389
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
390
+ "model.layers.7.mlp.up_proj.bias": "model-00001-of-00004.safetensors",
391
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
392
+ "model.layers.7.post_attention_layernorm.bias": "model-00001-of-00004.safetensors",
393
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
394
+ "model.layers.7.self_attn.dense.bias": "model-00001-of-00004.safetensors",
395
+ "model.layers.7.self_attn.dense.weight": "model-00001-of-00004.safetensors",
396
+ "model.layers.7.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
397
+ "model.layers.7.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
398
+ "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
399
+ "model.layers.8.input_layernorm.bias": "model-00001-of-00004.safetensors",
400
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
401
+ "model.layers.8.mlp.down_proj.bias": "model-00001-of-00004.safetensors",
402
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
403
+ "model.layers.8.mlp.up_proj.bias": "model-00001-of-00004.safetensors",
404
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
405
+ "model.layers.8.post_attention_layernorm.bias": "model-00001-of-00004.safetensors",
406
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
407
+ "model.layers.8.self_attn.dense.bias": "model-00001-of-00004.safetensors",
408
+ "model.layers.8.self_attn.dense.weight": "model-00001-of-00004.safetensors",
409
+ "model.layers.8.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
410
+ "model.layers.8.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
411
+ "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors",
412
+ "model.layers.9.input_layernorm.bias": "model-00002-of-00004.safetensors",
413
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
414
+ "model.layers.9.mlp.down_proj.bias": "model-00002-of-00004.safetensors",
415
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
416
+ "model.layers.9.mlp.up_proj.bias": "model-00002-of-00004.safetensors",
417
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
418
+ "model.layers.9.post_attention_layernorm.bias": "model-00002-of-00004.safetensors",
419
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
420
+ "model.layers.9.self_attn.dense.bias": "model-00001-of-00004.safetensors",
421
+ "model.layers.9.self_attn.dense.weight": "model-00001-of-00004.safetensors",
422
+ "model.layers.9.self_attn.query_key_value.bias": "model-00001-of-00004.safetensors",
423
+ "model.layers.9.self_attn.query_key_value.weight": "model-00001-of-00004.safetensors",
424
+ "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00001-of-00004.safetensors"
425
+ }
426
+ }
runs/Jul24_05-07-12_gilbreth-j001.rcac.purdue.edu/events.out.tfevents.1721812169.gilbreth-j001.rcac.purdue.edu.196523.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ab9f0d4b2e917f9558bbcf7b8a8a4d37de43d350c736f1c53e87a737e5f488a
3
- size 52207
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21230c114f40c9ed1a1355ab812c108245c87792e72f99e9038461341b167e29
3
+ size 57609
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.6284302195911938,
4
+ "train_runtime": 5860.6637,
5
+ "train_samples": 25000,
6
+ "train_samples_per_second": 4.266,
7
+ "train_steps_per_second": 0.133
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,1152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 782,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 6.329113924050633e-09,
14
+ "logits/generated": -Infinity,
15
+ "logits/real": -Infinity,
16
+ "logps/generated": -403.3199157714844,
17
+ "logps/real": -443.6107177734375,
18
+ "loss": 4.7453,
19
+ "rewards/accuracies": 0.5,
20
+ "rewards/generated": -12.943833351135254,
21
+ "rewards/margins": -2.53641414642334,
22
+ "rewards/real": -15.48024845123291,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.01,
27
+ "learning_rate": 6.329113924050633e-08,
28
+ "logits/generated": -Infinity,
29
+ "logits/real": -Infinity,
30
+ "logps/generated": -487.2403869628906,
31
+ "logps/real": -384.84637451171875,
32
+ "loss": 3.4512,
33
+ "rewards/accuracies": 0.6666666865348816,
34
+ "rewards/generated": -19.510425567626953,
35
+ "rewards/margins": 6.074762344360352,
36
+ "rewards/real": -13.435664176940918,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.03,
41
+ "learning_rate": 1.2658227848101266e-07,
42
+ "logits/generated": -Infinity,
43
+ "logits/real": -Infinity,
44
+ "logps/generated": -473.37353515625,
45
+ "logps/real": -413.13916015625,
46
+ "loss": 3.9704,
47
+ "rewards/accuracies": 0.637499988079071,
48
+ "rewards/generated": -18.36065101623535,
49
+ "rewards/margins": 3.8984241485595703,
50
+ "rewards/real": -14.462226867675781,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.04,
55
+ "learning_rate": 1.89873417721519e-07,
56
+ "logits/generated": -Infinity,
57
+ "logits/real": -Infinity,
58
+ "logps/generated": -510.23455810546875,
59
+ "logps/real": -408.78350830078125,
60
+ "loss": 3.6848,
61
+ "rewards/accuracies": 0.6875,
62
+ "rewards/generated": -21.037378311157227,
63
+ "rewards/margins": 7.7357025146484375,
64
+ "rewards/real": -13.301675796508789,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.05,
69
+ "learning_rate": 2.5316455696202533e-07,
70
+ "logits/generated": -Infinity,
71
+ "logits/real": -Infinity,
72
+ "logps/generated": -451.669189453125,
73
+ "logps/real": -368.1625061035156,
74
+ "loss": 3.9079,
75
+ "rewards/accuracies": 0.625,
76
+ "rewards/generated": -16.798627853393555,
77
+ "rewards/margins": 4.454809665679932,
78
+ "rewards/real": -12.343819618225098,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.06,
83
+ "learning_rate": 3.1645569620253163e-07,
84
+ "logits/generated": -Infinity,
85
+ "logits/real": -Infinity,
86
+ "logps/generated": -552.6080322265625,
87
+ "logps/real": -397.91064453125,
88
+ "loss": 2.9787,
89
+ "rewards/accuracies": 0.737500011920929,
90
+ "rewards/generated": -25.66506004333496,
91
+ "rewards/margins": 10.62718391418457,
92
+ "rewards/real": -15.037874221801758,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.08,
97
+ "learning_rate": 3.79746835443038e-07,
98
+ "logits/generated": -Infinity,
99
+ "logits/real": -Infinity,
100
+ "logps/generated": -509.36578369140625,
101
+ "logps/real": -390.8323059082031,
102
+ "loss": 2.5737,
103
+ "rewards/accuracies": 0.737500011920929,
104
+ "rewards/generated": -22.55277442932129,
105
+ "rewards/margins": 7.870760440826416,
106
+ "rewards/real": -14.682014465332031,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.09,
111
+ "learning_rate": 4.4303797468354424e-07,
112
+ "logits/generated": -Infinity,
113
+ "logits/real": -Infinity,
114
+ "logps/generated": -554.7327270507812,
115
+ "logps/real": -382.78350830078125,
116
+ "loss": 1.7978,
117
+ "rewards/accuracies": 0.8374999761581421,
118
+ "rewards/generated": -28.232168197631836,
119
+ "rewards/margins": 14.969076156616211,
120
+ "rewards/real": -13.263090133666992,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.1,
125
+ "learning_rate": 4.992887624466572e-07,
126
+ "logits/generated": -Infinity,
127
+ "logits/real": -Infinity,
128
+ "logps/generated": -573.7469482421875,
129
+ "logps/real": -402.5927429199219,
130
+ "loss": 1.4178,
131
+ "rewards/accuracies": 0.800000011920929,
132
+ "rewards/generated": -28.089534759521484,
133
+ "rewards/margins": 12.5045166015625,
134
+ "rewards/real": -15.5850191116333,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.12,
139
+ "learning_rate": 4.92176386913229e-07,
140
+ "logits/generated": -Infinity,
141
+ "logits/real": -Infinity,
142
+ "logps/generated": -576.7484741210938,
143
+ "logps/real": -400.97186279296875,
144
+ "loss": 1.2467,
145
+ "rewards/accuracies": 0.7875000238418579,
146
+ "rewards/generated": -30.259912490844727,
147
+ "rewards/margins": 14.735580444335938,
148
+ "rewards/real": -15.524327278137207,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.13,
153
+ "learning_rate": 4.850640113798008e-07,
154
+ "logits/generated": -Infinity,
155
+ "logits/real": -Infinity,
156
+ "logps/generated": -654.0052490234375,
157
+ "logps/real": -442.6397399902344,
158
+ "loss": 0.9329,
159
+ "rewards/accuracies": 0.8999999761581421,
160
+ "rewards/generated": -35.535160064697266,
161
+ "rewards/margins": 18.725465774536133,
162
+ "rewards/real": -16.8096923828125,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.14,
167
+ "learning_rate": 4.779516358463727e-07,
168
+ "logits/generated": -Infinity,
169
+ "logits/real": -Infinity,
170
+ "logps/generated": -638.0022583007812,
171
+ "logps/real": -427.4214782714844,
172
+ "loss": 1.1736,
173
+ "rewards/accuracies": 0.875,
174
+ "rewards/generated": -35.03214645385742,
175
+ "rewards/margins": 18.788606643676758,
176
+ "rewards/real": -16.24354362487793,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.15,
181
+ "learning_rate": 4.7083926031294454e-07,
182
+ "logits/generated": -Infinity,
183
+ "logits/real": -Infinity,
184
+ "logps/generated": -647.80859375,
185
+ "logps/real": -420.6051330566406,
186
+ "loss": 0.7488,
187
+ "rewards/accuracies": 0.9375,
188
+ "rewards/generated": -35.87686538696289,
189
+ "rewards/margins": 21.418781280517578,
190
+ "rewards/real": -14.458081245422363,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.17,
195
+ "learning_rate": 4.6372688477951633e-07,
196
+ "logits/generated": -Infinity,
197
+ "logits/real": -Infinity,
198
+ "logps/generated": -647.1038818359375,
199
+ "logps/real": -412.8779296875,
200
+ "loss": 0.6447,
201
+ "rewards/accuracies": 0.875,
202
+ "rewards/generated": -35.35771560668945,
203
+ "rewards/margins": 20.0076904296875,
204
+ "rewards/real": -15.350027084350586,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.18,
209
+ "learning_rate": 4.5661450924608817e-07,
210
+ "logits/generated": -Infinity,
211
+ "logits/real": -Infinity,
212
+ "logps/generated": -699.005126953125,
213
+ "logps/real": -401.8106384277344,
214
+ "loss": 0.9455,
215
+ "rewards/accuracies": 0.9375,
216
+ "rewards/generated": -38.48512268066406,
217
+ "rewards/margins": 23.671005249023438,
218
+ "rewards/real": -14.814119338989258,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.19,
223
+ "learning_rate": 4.4950213371266e-07,
224
+ "logits/generated": -Infinity,
225
+ "logits/real": -Infinity,
226
+ "logps/generated": -673.933349609375,
227
+ "logps/real": -404.8531494140625,
228
+ "loss": 0.6848,
229
+ "rewards/accuracies": 0.9125000238418579,
230
+ "rewards/generated": -38.79594039916992,
231
+ "rewards/margins": 22.858118057250977,
232
+ "rewards/real": -15.937822341918945,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.2,
237
+ "learning_rate": 4.4238975817923186e-07,
238
+ "logits/generated": -Infinity,
239
+ "logits/real": -Infinity,
240
+ "logps/generated": -650.0132446289062,
241
+ "logps/real": -380.55413818359375,
242
+ "loss": 0.5912,
243
+ "rewards/accuracies": 0.8999999761581421,
244
+ "rewards/generated": -36.62251663208008,
245
+ "rewards/margins": 22.439044952392578,
246
+ "rewards/real": -14.183469772338867,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.22,
251
+ "learning_rate": 4.3527738264580364e-07,
252
+ "logits/generated": -Infinity,
253
+ "logits/real": -Infinity,
254
+ "logps/generated": -676.5908203125,
255
+ "logps/real": -413.38287353515625,
256
+ "loss": 0.6238,
257
+ "rewards/accuracies": 0.9125000238418579,
258
+ "rewards/generated": -37.932899475097656,
259
+ "rewards/margins": 22.803815841674805,
260
+ "rewards/real": -15.129079818725586,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.23,
265
+ "learning_rate": 4.2816500711237554e-07,
266
+ "logits/generated": -Infinity,
267
+ "logits/real": -Infinity,
268
+ "logps/generated": -660.9660034179688,
269
+ "logps/real": -372.2694396972656,
270
+ "loss": 0.3412,
271
+ "rewards/accuracies": 0.9375,
272
+ "rewards/generated": -37.31585693359375,
273
+ "rewards/margins": 23.456205368041992,
274
+ "rewards/real": -13.859651565551758,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.24,
279
+ "learning_rate": 4.2105263157894733e-07,
280
+ "logits/generated": -Infinity,
281
+ "logits/real": -Infinity,
282
+ "logps/generated": -668.5888671875,
283
+ "logps/real": -366.01507568359375,
284
+ "loss": 0.5799,
285
+ "rewards/accuracies": 0.9624999761581421,
286
+ "rewards/generated": -37.69170379638672,
287
+ "rewards/margins": 25.54647445678711,
288
+ "rewards/real": -12.145231246948242,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.26,
293
+ "learning_rate": 4.1394025604551917e-07,
294
+ "logits/generated": -Infinity,
295
+ "logits/real": -Infinity,
296
+ "logps/generated": -681.8294677734375,
297
+ "logps/real": -392.57513427734375,
298
+ "loss": 0.6742,
299
+ "rewards/accuracies": 0.8999999761581421,
300
+ "rewards/generated": -38.56768035888672,
301
+ "rewards/margins": 24.48987579345703,
302
+ "rewards/real": -14.077804565429688,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.27,
307
+ "learning_rate": 4.06827880512091e-07,
308
+ "logits/generated": -Infinity,
309
+ "logits/real": -Infinity,
310
+ "logps/generated": -754.3670654296875,
311
+ "logps/real": -383.51702880859375,
312
+ "loss": 0.519,
313
+ "rewards/accuracies": 0.925000011920929,
314
+ "rewards/generated": -44.508426666259766,
315
+ "rewards/margins": 30.150854110717773,
316
+ "rewards/real": -14.357576370239258,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.28,
321
+ "learning_rate": 3.9971550497866285e-07,
322
+ "logits/generated": -Infinity,
323
+ "logits/real": -Infinity,
324
+ "logps/generated": -669.1099243164062,
325
+ "logps/real": -404.3431091308594,
326
+ "loss": 0.3679,
327
+ "rewards/accuracies": 0.9375,
328
+ "rewards/generated": -39.06100845336914,
329
+ "rewards/margins": 24.223697662353516,
330
+ "rewards/real": -14.837308883666992,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.29,
335
+ "learning_rate": 3.926031294452347e-07,
336
+ "logits/generated": -Infinity,
337
+ "logits/real": -Infinity,
338
+ "logps/generated": -688.25048828125,
339
+ "logps/real": -396.00018310546875,
340
+ "loss": 0.2677,
341
+ "rewards/accuracies": 0.949999988079071,
342
+ "rewards/generated": -39.11343765258789,
343
+ "rewards/margins": 24.355987548828125,
344
+ "rewards/real": -14.757448196411133,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.31,
349
+ "learning_rate": 3.8549075391180653e-07,
350
+ "logits/generated": -Infinity,
351
+ "logits/real": -Infinity,
352
+ "logps/generated": -714.4971923828125,
353
+ "logps/real": -415.25262451171875,
354
+ "loss": 0.4572,
355
+ "rewards/accuracies": 0.925000011920929,
356
+ "rewards/generated": -40.9525032043457,
357
+ "rewards/margins": 25.465791702270508,
358
+ "rewards/real": -15.486714363098145,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 0.32,
363
+ "learning_rate": 3.783783783783784e-07,
364
+ "logits/generated": -Infinity,
365
+ "logits/real": -Infinity,
366
+ "logps/generated": -729.0126342773438,
367
+ "logps/real": -394.86724853515625,
368
+ "loss": 0.345,
369
+ "rewards/accuracies": 0.9624999761581421,
370
+ "rewards/generated": -44.133033752441406,
371
+ "rewards/margins": 29.94510841369629,
372
+ "rewards/real": -14.187917709350586,
373
+ "step": 250
374
+ },
375
+ {
376
+ "epoch": 0.33,
377
+ "learning_rate": 3.7126600284495016e-07,
378
+ "logits/generated": -Infinity,
379
+ "logits/real": -Infinity,
380
+ "logps/generated": -722.02490234375,
381
+ "logps/real": -403.8961181640625,
382
+ "loss": 0.4853,
383
+ "rewards/accuracies": 0.9375,
384
+ "rewards/generated": -41.30921173095703,
385
+ "rewards/margins": 26.302785873413086,
386
+ "rewards/real": -15.006426811218262,
387
+ "step": 260
388
+ },
389
+ {
390
+ "epoch": 0.35,
391
+ "learning_rate": 3.6415362731152206e-07,
392
+ "logits/generated": -Infinity,
393
+ "logits/real": -Infinity,
394
+ "logps/generated": -695.8910522460938,
395
+ "logps/real": -374.00433349609375,
396
+ "loss": 0.3413,
397
+ "rewards/accuracies": 0.949999988079071,
398
+ "rewards/generated": -41.190391540527344,
399
+ "rewards/margins": 27.258630752563477,
400
+ "rewards/real": -13.931764602661133,
401
+ "step": 270
402
+ },
403
+ {
404
+ "epoch": 0.36,
405
+ "learning_rate": 3.5704125177809385e-07,
406
+ "logits/generated": -Infinity,
407
+ "logits/real": -Infinity,
408
+ "logps/generated": -734.4241943359375,
409
+ "logps/real": -404.4153137207031,
410
+ "loss": 0.4178,
411
+ "rewards/accuracies": 0.9624999761581421,
412
+ "rewards/generated": -43.61100387573242,
413
+ "rewards/margins": 28.391124725341797,
414
+ "rewards/real": -15.219881057739258,
415
+ "step": 280
416
+ },
417
+ {
418
+ "epoch": 0.37,
419
+ "learning_rate": 3.4992887624466574e-07,
420
+ "logits/generated": -Infinity,
421
+ "logits/real": -Infinity,
422
+ "logps/generated": -703.3042602539062,
423
+ "logps/real": -400.789306640625,
424
+ "loss": 0.358,
425
+ "rewards/accuracies": 0.9624999761581421,
426
+ "rewards/generated": -40.19432830810547,
427
+ "rewards/margins": 25.275157928466797,
428
+ "rewards/real": -14.919171333312988,
429
+ "step": 290
430
+ },
431
+ {
432
+ "epoch": 0.38,
433
+ "learning_rate": 3.4281650071123753e-07,
434
+ "logits/generated": -Infinity,
435
+ "logits/real": -Infinity,
436
+ "logps/generated": -717.597900390625,
437
+ "logps/real": -387.6546325683594,
438
+ "loss": 0.2285,
439
+ "rewards/accuracies": 0.949999988079071,
440
+ "rewards/generated": -42.98313522338867,
441
+ "rewards/margins": 27.72798728942871,
442
+ "rewards/real": -15.255144119262695,
443
+ "step": 300
444
+ },
445
+ {
446
+ "epoch": 0.4,
447
+ "learning_rate": 3.3570412517780937e-07,
448
+ "logits/generated": -Infinity,
449
+ "logits/real": -Infinity,
450
+ "logps/generated": -714.7513427734375,
451
+ "logps/real": -424.5567932128906,
452
+ "loss": 0.362,
453
+ "rewards/accuracies": 0.8999999761581421,
454
+ "rewards/generated": -42.02880859375,
455
+ "rewards/margins": 26.301555633544922,
456
+ "rewards/real": -15.727252006530762,
457
+ "step": 310
458
+ },
459
+ {
460
+ "epoch": 0.41,
461
+ "learning_rate": 3.285917496443812e-07,
462
+ "logits/generated": -Infinity,
463
+ "logits/real": -Infinity,
464
+ "logps/generated": -704.7682495117188,
465
+ "logps/real": -389.437744140625,
466
+ "loss": 0.3054,
467
+ "rewards/accuracies": 0.925000011920929,
468
+ "rewards/generated": -40.38294982910156,
469
+ "rewards/margins": 26.326675415039062,
470
+ "rewards/real": -14.05627727508545,
471
+ "step": 320
472
+ },
473
+ {
474
+ "epoch": 0.42,
475
+ "learning_rate": 3.2147937411095305e-07,
476
+ "logits/generated": -Infinity,
477
+ "logits/real": -Infinity,
478
+ "logps/generated": -788.1136474609375,
479
+ "logps/real": -426.383544921875,
480
+ "loss": 0.2255,
481
+ "rewards/accuracies": 0.9750000238418579,
482
+ "rewards/generated": -49.49579620361328,
483
+ "rewards/margins": 32.605934143066406,
484
+ "rewards/real": -16.889863967895508,
485
+ "step": 330
486
+ },
487
+ {
488
+ "epoch": 0.43,
489
+ "learning_rate": 3.1436699857752484e-07,
490
+ "logits/generated": -Infinity,
491
+ "logits/real": -Infinity,
492
+ "logps/generated": -720.3298950195312,
493
+ "logps/real": -392.3839111328125,
494
+ "loss": 0.3118,
495
+ "rewards/accuracies": 0.9624999761581421,
496
+ "rewards/generated": -42.088287353515625,
497
+ "rewards/margins": 27.807758331298828,
498
+ "rewards/real": -14.28053092956543,
499
+ "step": 340
500
+ },
501
+ {
502
+ "epoch": 0.45,
503
+ "learning_rate": 3.0725462304409674e-07,
504
+ "logits/generated": -Infinity,
505
+ "logits/real": -Infinity,
506
+ "logps/generated": -721.5670776367188,
507
+ "logps/real": -399.3511047363281,
508
+ "loss": 0.2108,
509
+ "rewards/accuracies": 0.9375,
510
+ "rewards/generated": -44.001930236816406,
511
+ "rewards/margins": 28.8182430267334,
512
+ "rewards/real": -15.183688163757324,
513
+ "step": 350
514
+ },
515
+ {
516
+ "epoch": 0.46,
517
+ "learning_rate": 3.001422475106685e-07,
518
+ "logits/generated": -Infinity,
519
+ "logits/real": -Infinity,
520
+ "logps/generated": -734.0841064453125,
521
+ "logps/real": -473.2635192871094,
522
+ "loss": 0.6012,
523
+ "rewards/accuracies": 0.925000011920929,
524
+ "rewards/generated": -44.092620849609375,
525
+ "rewards/margins": 25.192630767822266,
526
+ "rewards/real": -18.89999008178711,
527
+ "step": 360
528
+ },
529
+ {
530
+ "epoch": 0.47,
531
+ "learning_rate": 2.9302987197724037e-07,
532
+ "logits/generated": -Infinity,
533
+ "logits/real": -Infinity,
534
+ "logps/generated": -666.3681030273438,
535
+ "logps/real": -369.57232666015625,
536
+ "loss": 0.3811,
537
+ "rewards/accuracies": 0.9624999761581421,
538
+ "rewards/generated": -38.6079216003418,
539
+ "rewards/margins": 23.917612075805664,
540
+ "rewards/real": -14.690305709838867,
541
+ "step": 370
542
+ },
543
+ {
544
+ "epoch": 0.49,
545
+ "learning_rate": 2.8591749644381226e-07,
546
+ "logits/generated": -Infinity,
547
+ "logits/real": -Infinity,
548
+ "logps/generated": -709.2445068359375,
549
+ "logps/real": -394.5869140625,
550
+ "loss": 0.3208,
551
+ "rewards/accuracies": 0.949999988079071,
552
+ "rewards/generated": -42.30268096923828,
553
+ "rewards/margins": 26.500690460205078,
554
+ "rewards/real": -15.80199146270752,
555
+ "step": 380
556
+ },
557
+ {
558
+ "epoch": 0.5,
559
+ "learning_rate": 2.7880512091038405e-07,
560
+ "logits/generated": -Infinity,
561
+ "logits/real": -Infinity,
562
+ "logps/generated": -775.5260009765625,
563
+ "logps/real": -426.09869384765625,
564
+ "loss": 0.2884,
565
+ "rewards/accuracies": 0.9624999761581421,
566
+ "rewards/generated": -48.343849182128906,
567
+ "rewards/margins": 31.10616683959961,
568
+ "rewards/real": -17.237682342529297,
569
+ "step": 390
570
+ },
571
+ {
572
+ "epoch": 0.51,
573
+ "learning_rate": 2.716927453769559e-07,
574
+ "logits/generated": -Infinity,
575
+ "logits/real": -Infinity,
576
+ "logps/generated": -773.4555053710938,
577
+ "logps/real": -450.7288513183594,
578
+ "loss": 0.3376,
579
+ "rewards/accuracies": 0.949999988079071,
580
+ "rewards/generated": -47.49498748779297,
581
+ "rewards/margins": 28.620986938476562,
582
+ "rewards/real": -18.873998641967773,
583
+ "step": 400
584
+ },
585
+ {
586
+ "epoch": 0.52,
587
+ "learning_rate": 2.6458036984352773e-07,
588
+ "logits/generated": -Infinity,
589
+ "logits/real": -Infinity,
590
+ "logps/generated": -741.7351684570312,
591
+ "logps/real": -449.17816162109375,
592
+ "loss": 0.3754,
593
+ "rewards/accuracies": 0.925000011920929,
594
+ "rewards/generated": -46.99356460571289,
595
+ "rewards/margins": 27.995086669921875,
596
+ "rewards/real": -18.998476028442383,
597
+ "step": 410
598
+ },
599
+ {
600
+ "epoch": 0.54,
601
+ "learning_rate": 2.574679943100996e-07,
602
+ "logits/generated": -Infinity,
603
+ "logits/real": -Infinity,
604
+ "logps/generated": -738.3614501953125,
605
+ "logps/real": -456.7314453125,
606
+ "loss": 0.0844,
607
+ "rewards/accuracies": 0.9375,
608
+ "rewards/generated": -46.36874008178711,
609
+ "rewards/margins": 27.84047508239746,
610
+ "rewards/real": -18.528268814086914,
611
+ "step": 420
612
+ },
613
+ {
614
+ "epoch": 0.55,
615
+ "learning_rate": 2.5035561877667136e-07,
616
+ "logits/generated": -Infinity,
617
+ "logits/real": -Infinity,
618
+ "logps/generated": -768.1492309570312,
619
+ "logps/real": -441.343017578125,
620
+ "loss": 0.1898,
621
+ "rewards/accuracies": 0.9750000238418579,
622
+ "rewards/generated": -47.066619873046875,
623
+ "rewards/margins": 29.435678482055664,
624
+ "rewards/real": -17.63094139099121,
625
+ "step": 430
626
+ },
627
+ {
628
+ "epoch": 0.56,
629
+ "learning_rate": 2.4324324324324326e-07,
630
+ "logits/generated": -Infinity,
631
+ "logits/real": -Infinity,
632
+ "logps/generated": -787.4987182617188,
633
+ "logps/real": -408.2841796875,
634
+ "loss": 0.3922,
635
+ "rewards/accuracies": 0.987500011920929,
636
+ "rewards/generated": -50.27423095703125,
637
+ "rewards/margins": 33.921939849853516,
638
+ "rewards/real": -16.352293014526367,
639
+ "step": 440
640
+ },
641
+ {
642
+ "epoch": 0.58,
643
+ "learning_rate": 2.3613086770981507e-07,
644
+ "logits/generated": -Infinity,
645
+ "logits/real": -Infinity,
646
+ "logps/generated": -771.3111572265625,
647
+ "logps/real": -408.37896728515625,
648
+ "loss": 0.3608,
649
+ "rewards/accuracies": 0.949999988079071,
650
+ "rewards/generated": -48.568450927734375,
651
+ "rewards/margins": 31.147785186767578,
652
+ "rewards/real": -17.420665740966797,
653
+ "step": 450
654
+ },
655
+ {
656
+ "epoch": 0.59,
657
+ "learning_rate": 2.290184921763869e-07,
658
+ "logits/generated": -Infinity,
659
+ "logits/real": -Infinity,
660
+ "logps/generated": -827.6937255859375,
661
+ "logps/real": -423.6630859375,
662
+ "loss": 0.1253,
663
+ "rewards/accuracies": 1.0,
664
+ "rewards/generated": -51.94307327270508,
665
+ "rewards/margins": 35.77669143676758,
666
+ "rewards/real": -16.166385650634766,
667
+ "step": 460
668
+ },
669
+ {
670
+ "epoch": 0.6,
671
+ "learning_rate": 2.2190611664295875e-07,
672
+ "logits/generated": -Infinity,
673
+ "logits/real": -Infinity,
674
+ "logps/generated": -790.8399658203125,
675
+ "logps/real": -440.3358459472656,
676
+ "loss": 0.2949,
677
+ "rewards/accuracies": 0.925000011920929,
678
+ "rewards/generated": -48.574867248535156,
679
+ "rewards/margins": 30.353759765625,
680
+ "rewards/real": -18.22110939025879,
681
+ "step": 470
682
+ },
683
+ {
684
+ "epoch": 0.61,
685
+ "learning_rate": 2.1479374110953057e-07,
686
+ "logits/generated": -Infinity,
687
+ "logits/real": -Infinity,
688
+ "logps/generated": -781.35791015625,
689
+ "logps/real": -390.54168701171875,
690
+ "loss": 0.3877,
691
+ "rewards/accuracies": 0.9624999761581421,
692
+ "rewards/generated": -49.05020523071289,
693
+ "rewards/margins": 32.30176544189453,
694
+ "rewards/real": -16.748441696166992,
695
+ "step": 480
696
+ },
697
+ {
698
+ "epoch": 0.63,
699
+ "learning_rate": 2.076813655761024e-07,
700
+ "logits/generated": -Infinity,
701
+ "logits/real": -Infinity,
702
+ "logps/generated": -810.448486328125,
703
+ "logps/real": -441.8717346191406,
704
+ "loss": 0.2217,
705
+ "rewards/accuracies": 0.987500011920929,
706
+ "rewards/generated": -51.156005859375,
707
+ "rewards/margins": 31.813852310180664,
708
+ "rewards/real": -19.342153549194336,
709
+ "step": 490
710
+ },
711
+ {
712
+ "epoch": 0.64,
713
+ "learning_rate": 2.0056899004267425e-07,
714
+ "logits/generated": -Infinity,
715
+ "logits/real": -Infinity,
716
+ "logps/generated": -802.3043212890625,
717
+ "logps/real": -487.0044860839844,
718
+ "loss": 0.2807,
719
+ "rewards/accuracies": 0.9624999761581421,
720
+ "rewards/generated": -51.0858039855957,
721
+ "rewards/margins": 30.591796875,
722
+ "rewards/real": -20.494003295898438,
723
+ "step": 500
724
+ },
725
+ {
726
+ "epoch": 0.64,
727
+ "eval_logits/generated": -Infinity,
728
+ "eval_logits/real": -Infinity,
729
+ "eval_logps/generated": -518.7460327148438,
730
+ "eval_logps/real": -321.637939453125,
731
+ "eval_loss": 0.16799330711364746,
732
+ "eval_rewards/accuracies": 0.9442675113677979,
733
+ "eval_rewards/generated": -22.73108673095703,
734
+ "eval_rewards/margins": 15.805987358093262,
735
+ "eval_rewards/real": -6.9250993728637695,
736
+ "eval_runtime": 514.9684,
737
+ "eval_samples_per_second": 9.709,
738
+ "eval_steps_per_second": 0.305,
739
+ "step": 500
740
+ },
741
+ {
742
+ "epoch": 0.65,
743
+ "learning_rate": 1.9345661450924607e-07,
744
+ "logits/generated": -Infinity,
745
+ "logits/real": -Infinity,
746
+ "logps/generated": -764.2227783203125,
747
+ "logps/real": -417.8741149902344,
748
+ "loss": 0.2566,
749
+ "rewards/accuracies": 0.9624999761581421,
750
+ "rewards/generated": -46.21670150756836,
751
+ "rewards/margins": 29.2624568939209,
752
+ "rewards/real": -16.954248428344727,
753
+ "step": 510
754
+ },
755
+ {
756
+ "epoch": 0.66,
757
+ "learning_rate": 1.863442389758179e-07,
758
+ "logits/generated": -Infinity,
759
+ "logits/real": -Infinity,
760
+ "logps/generated": -785.6708984375,
761
+ "logps/real": -436.8841857910156,
762
+ "loss": 0.2175,
763
+ "rewards/accuracies": 0.987500011920929,
764
+ "rewards/generated": -50.03806686401367,
765
+ "rewards/margins": 33.52396011352539,
766
+ "rewards/real": -16.514102935791016,
767
+ "step": 520
768
+ },
769
+ {
770
+ "epoch": 0.68,
771
+ "learning_rate": 1.7923186344238975e-07,
772
+ "logits/generated": -Infinity,
773
+ "logits/real": -Infinity,
774
+ "logps/generated": -741.9736328125,
775
+ "logps/real": -408.57763671875,
776
+ "loss": 0.3024,
777
+ "rewards/accuracies": 0.9750000238418579,
778
+ "rewards/generated": -45.586387634277344,
779
+ "rewards/margins": 29.933719635009766,
780
+ "rewards/real": -15.652669906616211,
781
+ "step": 530
782
+ },
783
+ {
784
+ "epoch": 0.69,
785
+ "learning_rate": 1.721194879089616e-07,
786
+ "logits/generated": -Infinity,
787
+ "logits/real": -Infinity,
788
+ "logps/generated": -813.8468017578125,
789
+ "logps/real": -430.96026611328125,
790
+ "loss": 0.2112,
791
+ "rewards/accuracies": 0.949999988079071,
792
+ "rewards/generated": -52.11174392700195,
793
+ "rewards/margins": 33.244110107421875,
794
+ "rewards/real": -18.867637634277344,
795
+ "step": 540
796
+ },
797
+ {
798
+ "epoch": 0.7,
799
+ "learning_rate": 1.650071123755334e-07,
800
+ "logits/generated": -Infinity,
801
+ "logits/real": -Infinity,
802
+ "logps/generated": -813.7432250976562,
803
+ "logps/real": -425.29754638671875,
804
+ "loss": 0.3095,
805
+ "rewards/accuracies": 0.949999988079071,
806
+ "rewards/generated": -52.5365104675293,
807
+ "rewards/margins": 34.71119689941406,
808
+ "rewards/real": -17.825315475463867,
809
+ "step": 550
810
+ },
811
+ {
812
+ "epoch": 0.72,
813
+ "learning_rate": 1.5789473684210525e-07,
814
+ "logits/generated": -Infinity,
815
+ "logits/real": -Infinity,
816
+ "logps/generated": -790.2058715820312,
817
+ "logps/real": -440.711669921875,
818
+ "loss": 0.1864,
819
+ "rewards/accuracies": 0.9624999761581421,
820
+ "rewards/generated": -49.65260314941406,
821
+ "rewards/margins": 31.609088897705078,
822
+ "rewards/real": -18.04351234436035,
823
+ "step": 560
824
+ },
825
+ {
826
+ "epoch": 0.73,
827
+ "learning_rate": 1.507823613086771e-07,
828
+ "logits/generated": -Infinity,
829
+ "logits/real": -Infinity,
830
+ "logps/generated": -752.94677734375,
831
+ "logps/real": -427.7891540527344,
832
+ "loss": 0.3443,
833
+ "rewards/accuracies": 0.925000011920929,
834
+ "rewards/generated": -47.858802795410156,
835
+ "rewards/margins": 30.949520111083984,
836
+ "rewards/real": -16.90927505493164,
837
+ "step": 570
838
+ },
839
+ {
840
+ "epoch": 0.74,
841
+ "learning_rate": 1.436699857752489e-07,
842
+ "logits/generated": -Infinity,
843
+ "logits/real": -Infinity,
844
+ "logps/generated": -843.44140625,
845
+ "logps/real": -425.5953063964844,
846
+ "loss": 0.3088,
847
+ "rewards/accuracies": 0.9750000238418579,
848
+ "rewards/generated": -53.65172576904297,
849
+ "rewards/margins": 36.3214111328125,
850
+ "rewards/real": -17.330326080322266,
851
+ "step": 580
852
+ },
853
+ {
854
+ "epoch": 0.75,
855
+ "learning_rate": 1.3655761024182077e-07,
856
+ "logits/generated": -Infinity,
857
+ "logits/real": -Infinity,
858
+ "logps/generated": -816.5244750976562,
859
+ "logps/real": -419.6240234375,
860
+ "loss": 0.2445,
861
+ "rewards/accuracies": 0.9624999761581421,
862
+ "rewards/generated": -52.9045524597168,
863
+ "rewards/margins": 36.242149353027344,
864
+ "rewards/real": -16.662403106689453,
865
+ "step": 590
866
+ },
867
+ {
868
+ "epoch": 0.77,
869
+ "learning_rate": 1.2944523470839261e-07,
870
+ "logits/generated": -Infinity,
871
+ "logits/real": -Infinity,
872
+ "logps/generated": -824.7449340820312,
873
+ "logps/real": -429.56414794921875,
874
+ "loss": 0.3351,
875
+ "rewards/accuracies": 0.949999988079071,
876
+ "rewards/generated": -51.94647979736328,
877
+ "rewards/margins": 34.617332458496094,
878
+ "rewards/real": -17.329151153564453,
879
+ "step": 600
880
+ },
881
+ {
882
+ "epoch": 0.78,
883
+ "learning_rate": 1.2233285917496443e-07,
884
+ "logits/generated": -Infinity,
885
+ "logits/real": -Infinity,
886
+ "logps/generated": -775.5467529296875,
887
+ "logps/real": -422.3451232910156,
888
+ "loss": 0.07,
889
+ "rewards/accuracies": 0.987500011920929,
890
+ "rewards/generated": -48.54518508911133,
891
+ "rewards/margins": 32.839900970458984,
892
+ "rewards/real": -15.705281257629395,
893
+ "step": 610
894
+ },
895
+ {
896
+ "epoch": 0.79,
897
+ "learning_rate": 1.1522048364153626e-07,
898
+ "logits/generated": -Infinity,
899
+ "logits/real": -Infinity,
900
+ "logps/generated": -779.6900634765625,
901
+ "logps/real": -441.49365234375,
902
+ "loss": 0.2883,
903
+ "rewards/accuracies": 0.9624999761581421,
904
+ "rewards/generated": -49.64753341674805,
905
+ "rewards/margins": 32.548099517822266,
906
+ "rewards/real": -17.099435806274414,
907
+ "step": 620
908
+ },
909
+ {
910
+ "epoch": 0.81,
911
+ "learning_rate": 1.0810810810810811e-07,
912
+ "logits/generated": -Infinity,
913
+ "logits/real": -Infinity,
914
+ "logps/generated": -785.1602172851562,
915
+ "logps/real": -403.9580993652344,
916
+ "loss": 0.1209,
917
+ "rewards/accuracies": 0.9624999761581421,
918
+ "rewards/generated": -49.906761169433594,
919
+ "rewards/margins": 33.77416229248047,
920
+ "rewards/real": -16.13260269165039,
921
+ "step": 630
922
+ },
923
+ {
924
+ "epoch": 0.82,
925
+ "learning_rate": 1.0099573257467994e-07,
926
+ "logits/generated": -Infinity,
927
+ "logits/real": -Infinity,
928
+ "logps/generated": -830.7962036132812,
929
+ "logps/real": -457.5830993652344,
930
+ "loss": 0.2802,
931
+ "rewards/accuracies": 0.949999988079071,
932
+ "rewards/generated": -53.04398727416992,
933
+ "rewards/margins": 34.22355270385742,
934
+ "rewards/real": -18.820430755615234,
935
+ "step": 640
936
+ },
937
+ {
938
+ "epoch": 0.83,
939
+ "learning_rate": 9.388335704125178e-08,
940
+ "logits/generated": -Infinity,
941
+ "logits/real": -Infinity,
942
+ "logps/generated": -774.2494506835938,
943
+ "logps/real": -407.559326171875,
944
+ "loss": 0.1348,
945
+ "rewards/accuracies": 1.0,
946
+ "rewards/generated": -49.61153030395508,
947
+ "rewards/margins": 32.225975036621094,
948
+ "rewards/real": -17.385555267333984,
949
+ "step": 650
950
+ },
951
+ {
952
+ "epoch": 0.84,
953
+ "learning_rate": 8.677098150782361e-08,
954
+ "logits/generated": -Infinity,
955
+ "logits/real": -Infinity,
956
+ "logps/generated": -772.5739135742188,
957
+ "logps/real": -409.3276672363281,
958
+ "loss": 0.0997,
959
+ "rewards/accuracies": 0.987500011920929,
960
+ "rewards/generated": -48.17709732055664,
961
+ "rewards/margins": 32.27601623535156,
962
+ "rewards/real": -15.901082038879395,
963
+ "step": 660
964
+ },
965
+ {
966
+ "epoch": 0.86,
967
+ "learning_rate": 7.965860597439544e-08,
968
+ "logits/generated": -Infinity,
969
+ "logits/real": -Infinity,
970
+ "logps/generated": -764.9151000976562,
971
+ "logps/real": -425.33709716796875,
972
+ "loss": 0.2364,
973
+ "rewards/accuracies": 0.925000011920929,
974
+ "rewards/generated": -48.48712921142578,
975
+ "rewards/margins": 31.54388427734375,
976
+ "rewards/real": -16.9432430267334,
977
+ "step": 670
978
+ },
979
+ {
980
+ "epoch": 0.87,
981
+ "learning_rate": 7.254623044096728e-08,
982
+ "logits/generated": -Infinity,
983
+ "logits/real": -Infinity,
984
+ "logps/generated": -822.6394653320312,
985
+ "logps/real": -434.67877197265625,
986
+ "loss": 0.242,
987
+ "rewards/accuracies": 0.987500011920929,
988
+ "rewards/generated": -53.5615348815918,
989
+ "rewards/margins": 35.22587585449219,
990
+ "rewards/real": -18.33565902709961,
991
+ "step": 680
992
+ },
993
+ {
994
+ "epoch": 0.88,
995
+ "learning_rate": 6.543385490753911e-08,
996
+ "logits/generated": -Infinity,
997
+ "logits/real": -Infinity,
998
+ "logps/generated": -868.9513549804688,
999
+ "logps/real": -439.777587890625,
1000
+ "loss": 0.203,
1001
+ "rewards/accuracies": 0.9624999761581421,
1002
+ "rewards/generated": -56.48514938354492,
1003
+ "rewards/margins": 37.33259201049805,
1004
+ "rewards/real": -19.152555465698242,
1005
+ "step": 690
1006
+ },
1007
+ {
1008
+ "epoch": 0.9,
1009
+ "learning_rate": 5.832147937411095e-08,
1010
+ "logits/generated": -Infinity,
1011
+ "logits/real": -Infinity,
1012
+ "logps/generated": -784.9397583007812,
1013
+ "logps/real": -413.0848083496094,
1014
+ "loss": 0.2787,
1015
+ "rewards/accuracies": 0.925000011920929,
1016
+ "rewards/generated": -47.916893005371094,
1017
+ "rewards/margins": 29.88480567932129,
1018
+ "rewards/real": -18.032087326049805,
1019
+ "step": 700
1020
+ },
1021
+ {
1022
+ "epoch": 0.91,
1023
+ "learning_rate": 5.120910384068278e-08,
1024
+ "logits/generated": -Infinity,
1025
+ "logits/real": -Infinity,
1026
+ "logps/generated": -824.5567626953125,
1027
+ "logps/real": -448.226318359375,
1028
+ "loss": 0.2584,
1029
+ "rewards/accuracies": 0.949999988079071,
1030
+ "rewards/generated": -53.5278205871582,
1031
+ "rewards/margins": 35.21510696411133,
1032
+ "rewards/real": -18.312713623046875,
1033
+ "step": 710
1034
+ },
1035
+ {
1036
+ "epoch": 0.92,
1037
+ "learning_rate": 4.4096728307254624e-08,
1038
+ "logits/generated": -Infinity,
1039
+ "logits/real": -Infinity,
1040
+ "logps/generated": -828.8753662109375,
1041
+ "logps/real": -427.9056091308594,
1042
+ "loss": 0.1088,
1043
+ "rewards/accuracies": 0.987500011920929,
1044
+ "rewards/generated": -53.2839469909668,
1045
+ "rewards/margins": 35.713260650634766,
1046
+ "rewards/real": -17.57068634033203,
1047
+ "step": 720
1048
+ },
1049
+ {
1050
+ "epoch": 0.93,
1051
+ "learning_rate": 3.698435277382646e-08,
1052
+ "logits/generated": -Infinity,
1053
+ "logits/real": -Infinity,
1054
+ "logps/generated": -839.0155029296875,
1055
+ "logps/real": -435.33428955078125,
1056
+ "loss": 0.2107,
1057
+ "rewards/accuracies": 1.0,
1058
+ "rewards/generated": -55.168609619140625,
1059
+ "rewards/margins": 37.164310455322266,
1060
+ "rewards/real": -18.004301071166992,
1061
+ "step": 730
1062
+ },
1063
+ {
1064
+ "epoch": 0.95,
1065
+ "learning_rate": 2.9871977240398294e-08,
1066
+ "logits/generated": -Infinity,
1067
+ "logits/real": -Infinity,
1068
+ "logps/generated": -805.4227905273438,
1069
+ "logps/real": -428.77923583984375,
1070
+ "loss": 0.2453,
1071
+ "rewards/accuracies": 0.9624999761581421,
1072
+ "rewards/generated": -52.77501678466797,
1073
+ "rewards/margins": 34.709877014160156,
1074
+ "rewards/real": -18.065141677856445,
1075
+ "step": 740
1076
+ },
1077
+ {
1078
+ "epoch": 0.96,
1079
+ "learning_rate": 2.275960170697013e-08,
1080
+ "logits/generated": -Infinity,
1081
+ "logits/real": -Infinity,
1082
+ "logps/generated": -834.6575927734375,
1083
+ "logps/real": -415.8843688964844,
1084
+ "loss": 0.1762,
1085
+ "rewards/accuracies": 0.9750000238418579,
1086
+ "rewards/generated": -52.49644088745117,
1087
+ "rewards/margins": 36.04848098754883,
1088
+ "rewards/real": -16.447967529296875,
1089
+ "step": 750
1090
+ },
1091
+ {
1092
+ "epoch": 0.97,
1093
+ "learning_rate": 1.564722617354196e-08,
1094
+ "logits/generated": -Infinity,
1095
+ "logits/real": -Infinity,
1096
+ "logps/generated": -850.2274169921875,
1097
+ "logps/real": -395.8170471191406,
1098
+ "loss": 0.1395,
1099
+ "rewards/accuracies": 0.9750000238418579,
1100
+ "rewards/generated": -55.13444900512695,
1101
+ "rewards/margins": 38.56990432739258,
1102
+ "rewards/real": -16.564544677734375,
1103
+ "step": 760
1104
+ },
1105
+ {
1106
+ "epoch": 0.98,
1107
+ "learning_rate": 8.534850640113798e-09,
1108
+ "logits/generated": -Infinity,
1109
+ "logits/real": -Infinity,
1110
+ "logps/generated": -807.279296875,
1111
+ "logps/real": -415.69586181640625,
1112
+ "loss": 0.1756,
1113
+ "rewards/accuracies": 0.9750000238418579,
1114
+ "rewards/generated": -52.691368103027344,
1115
+ "rewards/margins": 36.51746368408203,
1116
+ "rewards/real": -16.173904418945312,
1117
+ "step": 770
1118
+ },
1119
+ {
1120
+ "epoch": 1.0,
1121
+ "learning_rate": 1.422475106685633e-09,
1122
+ "logits/generated": -Infinity,
1123
+ "logits/real": -Infinity,
1124
+ "logps/generated": -819.2388916015625,
1125
+ "logps/real": -390.07391357421875,
1126
+ "loss": 0.2143,
1127
+ "rewards/accuracies": 0.9750000238418579,
1128
+ "rewards/generated": -52.7367057800293,
1129
+ "rewards/margins": 36.12593460083008,
1130
+ "rewards/real": -16.610774993896484,
1131
+ "step": 780
1132
+ },
1133
+ {
1134
+ "epoch": 1.0,
1135
+ "step": 782,
1136
+ "total_flos": 0.0,
1137
+ "train_loss": 0.6284302195911938,
1138
+ "train_runtime": 5860.6637,
1139
+ "train_samples_per_second": 4.266,
1140
+ "train_steps_per_second": 0.133
1141
+ }
1142
+ ],
1143
+ "logging_steps": 10,
1144
+ "max_steps": 782,
1145
+ "num_input_tokens_seen": 0,
1146
+ "num_train_epochs": 1,
1147
+ "save_steps": 100,
1148
+ "total_flos": 0.0,
1149
+ "train_batch_size": 8,
1150
+ "trial_name": null,
1151
+ "trial_params": null
1152
+ }