AmberYifan commited on
Commit
d045310
1 Parent(s): cbea852

Model save

Browse files
README.md ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: AmberYifan/mistral-safe-sft-full
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: mistral-sft4epoch-spin-v
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # mistral-sft4epoch-spin-v
15
+
16
+ This model is a fine-tuned version of [AmberYifan/mistral-safe-sft-full](https://huggingface.co/AmberYifan/mistral-safe-sft-full) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 0.2295
19
+ - Rewards/real: 10.1264
20
+ - Rewards/generated: -5.0006
21
+ - Rewards/accuracies: 0.9922
22
+ - Rewards/margins: 15.1270
23
+ - Logps/generated: -128.7231
24
+ - Logps/real: -111.4173
25
+ - Logits/generated: -2.7320
26
+ - Logits/real: -2.7332
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-07
46
+ - train_batch_size: 8
47
+ - eval_batch_size: 8
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - num_devices: 4
51
+ - total_train_batch_size: 32
52
+ - total_eval_batch_size: 32
53
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
+ - lr_scheduler_type: linear
55
+ - lr_scheduler_warmup_ratio: 0.1
56
+ - num_epochs: 1
57
+
58
+ ### Training results
59
+
60
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/real | Rewards/generated | Rewards/accuracies | Rewards/margins | Logps/generated | Logps/real | Logits/generated | Logits/real |
61
+ |:-------------:|:------:|:----:|:---------------:|:------------:|:-----------------:|:------------------:|:---------------:|:---------------:|:----------:|:----------------:|:-----------:|
62
+ | 0.2262 | 0.1280 | 200 | 0.2435 | 9.5632 | -4.7644 | 0.9922 | 14.3276 | -126.3612 | -117.0495 | -2.8333 | -2.8265 |
63
+ | 0.2141 | 0.2559 | 400 | 0.2357 | 9.8979 | -4.9468 | 0.9922 | 14.8447 | -128.1855 | -113.7022 | -2.7752 | -2.7613 |
64
+ | 0.2089 | 0.3839 | 600 | 0.2341 | 10.0245 | -4.8956 | 0.9922 | 14.9201 | -127.6730 | -112.4365 | -2.7914 | -2.7984 |
65
+ | 0.2148 | 0.5118 | 800 | 0.2309 | 10.0410 | -5.0904 | 0.9922 | 15.1314 | -129.6210 | -112.2710 | -2.8195 | -2.8238 |
66
+ | 0.1994 | 0.6398 | 1000 | 0.2303 | 10.1131 | -5.1876 | 0.9922 | 15.3008 | -130.5933 | -111.5497 | -2.7442 | -2.7461 |
67
+ | 0.2075 | 0.7678 | 1200 | 0.2304 | 10.1155 | -4.9679 | 0.9922 | 15.0834 | -128.3958 | -111.5260 | -2.7360 | -2.7372 |
68
+ | 0.1961 | 0.8957 | 1400 | 0.2295 | 10.1264 | -5.0006 | 0.9922 | 15.1270 | -128.7231 | -111.4173 | -2.7320 | -2.7332 |
69
+
70
+
71
+ ### Framework versions
72
+
73
+ - Transformers 4.43.3
74
+ - Pytorch 2.2.2+cu121
75
+ - Datasets 2.20.0
76
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.2369839982275618,
5
+ "train_runtime": 14627.5004,
6
+ "train_samples": 50000,
7
+ "train_samples_per_second": 3.418,
8
+ "train_steps_per_second": 0.107
9
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.43.3"
6
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.2369839982275618,
5
+ "train_runtime": 14627.5004,
6
+ "train_samples": 50000,
7
+ "train_samples_per_second": 3.418,
8
+ "train_steps_per_second": 0.107
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,2509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 200,
6
+ "global_step": 1563,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0006397952655150352,
13
+ "grad_norm": 412.2480368303884,
14
+ "learning_rate": 3.1847133757961784e-09,
15
+ "logits/generated": -2.53759503364563,
16
+ "logits/real": -2.290781259536743,
17
+ "logps/generated": -79.34207153320312,
18
+ "logps/real": -142.87411499023438,
19
+ "loss": 1.0838,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/generated": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/real": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.006397952655150352,
28
+ "grad_norm": 431.96820478477065,
29
+ "learning_rate": 3.184713375796178e-08,
30
+ "logits/generated": -2.5309884548187256,
31
+ "logits/real": -2.523167371749878,
32
+ "logps/generated": -88.78756713867188,
33
+ "logps/real": -236.1966094970703,
34
+ "loss": 1.1221,
35
+ "rewards/accuracies": 0.5555555820465088,
36
+ "rewards/generated": -0.010531743057072163,
37
+ "rewards/margins": 0.02550431713461876,
38
+ "rewards/real": 0.014972572214901447,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.012795905310300703,
43
+ "grad_norm": 299.862381030409,
44
+ "learning_rate": 6.369426751592356e-08,
45
+ "logits/generated": -2.578387498855591,
46
+ "logits/real": -2.5629818439483643,
47
+ "logps/generated": -90.43721771240234,
48
+ "logps/real": -232.0134735107422,
49
+ "loss": 0.9789,
50
+ "rewards/accuracies": 0.9624999761581421,
51
+ "rewards/generated": -0.10627230256795883,
52
+ "rewards/margins": 0.31521138548851013,
53
+ "rewards/real": 0.2089390754699707,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.019193857965451054,
58
+ "grad_norm": 128.80078242264278,
59
+ "learning_rate": 9.554140127388536e-08,
60
+ "logits/generated": -2.639655590057373,
61
+ "logits/real": -2.623234748840332,
62
+ "logps/generated": -81.7702865600586,
63
+ "logps/real": -192.8977813720703,
64
+ "loss": 0.732,
65
+ "rewards/accuracies": 1.0,
66
+ "rewards/generated": -0.4113238453865051,
67
+ "rewards/margins": 1.0968698263168335,
68
+ "rewards/real": 0.6855460405349731,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.025591810620601407,
73
+ "grad_norm": 53.90372491881725,
74
+ "learning_rate": 1.2738853503184713e-07,
75
+ "logits/generated": -2.754666566848755,
76
+ "logits/real": -2.771085262298584,
77
+ "logps/generated": -93.06287384033203,
78
+ "logps/real": -208.916015625,
79
+ "loss": 0.551,
80
+ "rewards/accuracies": 1.0,
81
+ "rewards/generated": -0.8528655171394348,
82
+ "rewards/margins": 2.697784423828125,
83
+ "rewards/real": 1.844918966293335,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.03198976327575176,
88
+ "grad_norm": 36.32945064394617,
89
+ "learning_rate": 1.592356687898089e-07,
90
+ "logits/generated": -2.875619649887085,
91
+ "logits/real": -2.8736255168914795,
92
+ "logps/generated": -102.63410949707031,
93
+ "logps/real": -185.98129272460938,
94
+ "loss": 0.4387,
95
+ "rewards/accuracies": 0.987500011920929,
96
+ "rewards/generated": -1.5957601070404053,
97
+ "rewards/margins": 4.849114894866943,
98
+ "rewards/real": 3.253354549407959,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.03838771593090211,
103
+ "grad_norm": 22.26335616683524,
104
+ "learning_rate": 1.9108280254777072e-07,
105
+ "logits/generated": -2.9478697776794434,
106
+ "logits/real": -3.007082223892212,
107
+ "logps/generated": -110.52901458740234,
108
+ "logps/real": -171.8012237548828,
109
+ "loss": 0.3865,
110
+ "rewards/accuracies": 1.0,
111
+ "rewards/generated": -1.786839485168457,
112
+ "rewards/margins": 6.540804862976074,
113
+ "rewards/real": 4.753964900970459,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.044785668586052464,
118
+ "grad_norm": 18.72255550731988,
119
+ "learning_rate": 2.2292993630573247e-07,
120
+ "logits/generated": -2.9968416690826416,
121
+ "logits/real": -3.001485824584961,
122
+ "logps/generated": -107.23085021972656,
123
+ "logps/real": -146.6729736328125,
124
+ "loss": 0.3283,
125
+ "rewards/accuracies": 1.0,
126
+ "rewards/generated": -1.93621826171875,
127
+ "rewards/margins": 8.847390174865723,
128
+ "rewards/real": 6.911171913146973,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.05118362124120281,
133
+ "grad_norm": 9.385567995761765,
134
+ "learning_rate": 2.5477707006369425e-07,
135
+ "logits/generated": -2.985713481903076,
136
+ "logits/real": -3.0017921924591064,
137
+ "logps/generated": -111.2695083618164,
138
+ "logps/real": -128.3047332763672,
139
+ "loss": 0.2744,
140
+ "rewards/accuracies": 1.0,
141
+ "rewards/generated": -2.9947776794433594,
142
+ "rewards/margins": 10.760424613952637,
143
+ "rewards/real": 7.765646934509277,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.05758157389635317,
148
+ "grad_norm": 4.574434721884733,
149
+ "learning_rate": 2.86624203821656e-07,
150
+ "logits/generated": -2.93389630317688,
151
+ "logits/real": -2.9760384559631348,
152
+ "logps/generated": -124.31734466552734,
153
+ "logps/real": -127.97233581542969,
154
+ "loss": 0.269,
155
+ "rewards/accuracies": 1.0,
156
+ "rewards/generated": -3.8817825317382812,
157
+ "rewards/margins": 12.403055191040039,
158
+ "rewards/real": 8.521272659301758,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.06397952655150352,
163
+ "grad_norm": 9.260926951901052,
164
+ "learning_rate": 3.184713375796178e-07,
165
+ "logits/generated": -2.948780059814453,
166
+ "logits/real": -2.924206495285034,
167
+ "logps/generated": -117.82643127441406,
168
+ "logps/real": -140.2669219970703,
169
+ "loss": 0.2787,
170
+ "rewards/accuracies": 1.0,
171
+ "rewards/generated": -3.5896174907684326,
172
+ "rewards/margins": 13.11004638671875,
173
+ "rewards/real": 9.520429611206055,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.07037747920665387,
178
+ "grad_norm": 5.655236437132318,
179
+ "learning_rate": 3.5031847133757957e-07,
180
+ "logits/generated": -2.9382705688476562,
181
+ "logits/real": -2.895289421081543,
182
+ "logps/generated": -111.1765365600586,
183
+ "logps/real": -122.95256042480469,
184
+ "loss": 0.2607,
185
+ "rewards/accuracies": 1.0,
186
+ "rewards/generated": -3.181370258331299,
187
+ "rewards/margins": 12.501322746276855,
188
+ "rewards/real": 9.319952011108398,
189
+ "step": 110
190
+ },
191
+ {
192
+ "epoch": 0.07677543186180422,
193
+ "grad_norm": 10.474178085979151,
194
+ "learning_rate": 3.8216560509554143e-07,
195
+ "logits/generated": -2.9170148372650146,
196
+ "logits/real": -2.9494102001190186,
197
+ "logps/generated": -120.6019515991211,
198
+ "logps/real": -120.84870910644531,
199
+ "loss": 0.2583,
200
+ "rewards/accuracies": 1.0,
201
+ "rewards/generated": -3.854489803314209,
202
+ "rewards/margins": 13.146855354309082,
203
+ "rewards/real": 9.292365074157715,
204
+ "step": 120
205
+ },
206
+ {
207
+ "epoch": 0.08317338451695458,
208
+ "grad_norm": 3.425127673225565,
209
+ "learning_rate": 4.140127388535032e-07,
210
+ "logits/generated": -2.962573528289795,
211
+ "logits/real": -2.9198384284973145,
212
+ "logps/generated": -119.30030822753906,
213
+ "logps/real": -114.26997375488281,
214
+ "loss": 0.2475,
215
+ "rewards/accuracies": 1.0,
216
+ "rewards/generated": -4.209897041320801,
217
+ "rewards/margins": 13.073972702026367,
218
+ "rewards/real": 8.864076614379883,
219
+ "step": 130
220
+ },
221
+ {
222
+ "epoch": 0.08957133717210493,
223
+ "grad_norm": 2.8766539348394065,
224
+ "learning_rate": 4.4585987261146494e-07,
225
+ "logits/generated": -2.902782440185547,
226
+ "logits/real": -2.8874409198760986,
227
+ "logps/generated": -138.11538696289062,
228
+ "logps/real": -128.03103637695312,
229
+ "loss": 0.2566,
230
+ "rewards/accuracies": 1.0,
231
+ "rewards/generated": -5.331695556640625,
232
+ "rewards/margins": 15.117105484008789,
233
+ "rewards/real": 9.785411834716797,
234
+ "step": 140
235
+ },
236
+ {
237
+ "epoch": 0.09596928982725528,
238
+ "grad_norm": 2.928535512875351,
239
+ "learning_rate": 4.777070063694267e-07,
240
+ "logits/generated": -2.9390759468078613,
241
+ "logits/real": -2.9156928062438965,
242
+ "logps/generated": -128.9432373046875,
243
+ "logps/real": -112.00594329833984,
244
+ "loss": 0.2422,
245
+ "rewards/accuracies": 1.0,
246
+ "rewards/generated": -4.531386375427246,
247
+ "rewards/margins": 13.208353042602539,
248
+ "rewards/real": 8.676966667175293,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.10236724248240563,
253
+ "grad_norm": 4.646412917226807,
254
+ "learning_rate": 4.989331436699858e-07,
255
+ "logits/generated": -2.907299041748047,
256
+ "logits/real": -2.8829636573791504,
257
+ "logps/generated": -147.1040802001953,
258
+ "logps/real": -130.88232421875,
259
+ "loss": 0.2552,
260
+ "rewards/accuracies": 1.0,
261
+ "rewards/generated": -5.514917373657227,
262
+ "rewards/margins": 16.280481338500977,
263
+ "rewards/real": 10.76556396484375,
264
+ "step": 160
265
+ },
266
+ {
267
+ "epoch": 0.10876519513755598,
268
+ "grad_norm": 2.67790411455109,
269
+ "learning_rate": 4.953769559032717e-07,
270
+ "logits/generated": -2.8610851764678955,
271
+ "logits/real": -2.931917667388916,
272
+ "logps/generated": -134.75003051757812,
273
+ "logps/real": -93.88365173339844,
274
+ "loss": 0.2215,
275
+ "rewards/accuracies": 1.0,
276
+ "rewards/generated": -4.963212013244629,
277
+ "rewards/margins": 13.223382949829102,
278
+ "rewards/real": 8.260170936584473,
279
+ "step": 170
280
+ },
281
+ {
282
+ "epoch": 0.11516314779270634,
283
+ "grad_norm": 3.0194042444553317,
284
+ "learning_rate": 4.918207681365576e-07,
285
+ "logits/generated": -2.8236758708953857,
286
+ "logits/real": -2.895191192626953,
287
+ "logps/generated": -128.0216522216797,
288
+ "logps/real": -105.7012939453125,
289
+ "loss": 0.2328,
290
+ "rewards/accuracies": 1.0,
291
+ "rewards/generated": -4.834359645843506,
292
+ "rewards/margins": 14.000758171081543,
293
+ "rewards/real": 9.166399002075195,
294
+ "step": 180
295
+ },
296
+ {
297
+ "epoch": 0.12156110044785669,
298
+ "grad_norm": 7.731581783616942,
299
+ "learning_rate": 4.882645803698435e-07,
300
+ "logits/generated": -2.880075693130493,
301
+ "logits/real": -2.863041400909424,
302
+ "logps/generated": -129.73480224609375,
303
+ "logps/real": -121.33485412597656,
304
+ "loss": 0.2407,
305
+ "rewards/accuracies": 0.987500011920929,
306
+ "rewards/generated": -4.882136344909668,
307
+ "rewards/margins": 15.376531600952148,
308
+ "rewards/real": 10.49439525604248,
309
+ "step": 190
310
+ },
311
+ {
312
+ "epoch": 0.12795905310300704,
313
+ "grad_norm": 6.783378094196307,
314
+ "learning_rate": 4.847083926031294e-07,
315
+ "logits/generated": -2.8695290088653564,
316
+ "logits/real": -2.863180637359619,
317
+ "logps/generated": -143.60906982421875,
318
+ "logps/real": -90.62736511230469,
319
+ "loss": 0.2262,
320
+ "rewards/accuracies": 1.0,
321
+ "rewards/generated": -5.7865214347839355,
322
+ "rewards/margins": 13.516904830932617,
323
+ "rewards/real": 7.73038387298584,
324
+ "step": 200
325
+ },
326
+ {
327
+ "epoch": 0.12795905310300704,
328
+ "eval_logits/generated": -2.833332061767578,
329
+ "eval_logits/real": -2.8265442848205566,
330
+ "eval_logps/generated": -126.3611831665039,
331
+ "eval_logps/real": -117.04948425292969,
332
+ "eval_loss": 0.2435438483953476,
333
+ "eval_rewards/accuracies": 0.9921875,
334
+ "eval_rewards/generated": -4.7644171714782715,
335
+ "eval_rewards/margins": 14.327579498291016,
336
+ "eval_rewards/real": 9.563161849975586,
337
+ "eval_runtime": 57.2388,
338
+ "eval_samples_per_second": 8.735,
339
+ "eval_steps_per_second": 0.28,
340
+ "step": 200
341
+ },
342
+ {
343
+ "epoch": 0.1343570057581574,
344
+ "grad_norm": 4.630294431619136,
345
+ "learning_rate": 4.811522048364154e-07,
346
+ "logits/generated": -2.848015546798706,
347
+ "logits/real": -2.878896474838257,
348
+ "logps/generated": -134.64739990234375,
349
+ "logps/real": -130.23609924316406,
350
+ "loss": 0.2535,
351
+ "rewards/accuracies": 1.0,
352
+ "rewards/generated": -5.333014965057373,
353
+ "rewards/margins": 16.449054718017578,
354
+ "rewards/real": 11.11603832244873,
355
+ "step": 210
356
+ },
357
+ {
358
+ "epoch": 0.14075495841330773,
359
+ "grad_norm": 3.37570780842826,
360
+ "learning_rate": 4.775960170697012e-07,
361
+ "logits/generated": -2.9035279750823975,
362
+ "logits/real": -2.8899435997009277,
363
+ "logps/generated": -159.3094024658203,
364
+ "logps/real": -124.04632568359375,
365
+ "loss": 0.243,
366
+ "rewards/accuracies": 1.0,
367
+ "rewards/generated": -6.095156669616699,
368
+ "rewards/margins": 16.67739486694336,
369
+ "rewards/real": 10.58223819732666,
370
+ "step": 220
371
+ },
372
+ {
373
+ "epoch": 0.1471529110684581,
374
+ "grad_norm": 2.451067761798054,
375
+ "learning_rate": 4.7403982930298717e-07,
376
+ "logits/generated": -2.87707257270813,
377
+ "logits/real": -2.901421546936035,
378
+ "logps/generated": -120.0963134765625,
379
+ "logps/real": -123.16734313964844,
380
+ "loss": 0.2359,
381
+ "rewards/accuracies": 1.0,
382
+ "rewards/generated": -4.565489768981934,
383
+ "rewards/margins": 15.016607284545898,
384
+ "rewards/real": 10.451117515563965,
385
+ "step": 230
386
+ },
387
+ {
388
+ "epoch": 0.15355086372360843,
389
+ "grad_norm": 2.3458265316121145,
390
+ "learning_rate": 4.7048364153627306e-07,
391
+ "logits/generated": -2.845377206802368,
392
+ "logits/real": -2.9469656944274902,
393
+ "logps/generated": -130.70401000976562,
394
+ "logps/real": -102.62498474121094,
395
+ "loss": 0.2292,
396
+ "rewards/accuracies": 1.0,
397
+ "rewards/generated": -4.917192459106445,
398
+ "rewards/margins": 13.963025093078613,
399
+ "rewards/real": 9.045831680297852,
400
+ "step": 240
401
+ },
402
+ {
403
+ "epoch": 0.1599488163787588,
404
+ "grad_norm": 2.2252235869873425,
405
+ "learning_rate": 4.66927453769559e-07,
406
+ "logits/generated": -2.814479351043701,
407
+ "logits/real": -2.79661226272583,
408
+ "logps/generated": -129.10992431640625,
409
+ "logps/real": -110.62039947509766,
410
+ "loss": 0.2418,
411
+ "rewards/accuracies": 1.0,
412
+ "rewards/generated": -4.840092182159424,
413
+ "rewards/margins": 14.007853507995605,
414
+ "rewards/real": 9.16776180267334,
415
+ "step": 250
416
+ },
417
+ {
418
+ "epoch": 0.16634676903390916,
419
+ "grad_norm": 3.2120162920530775,
420
+ "learning_rate": 4.633712660028449e-07,
421
+ "logits/generated": -2.7691586017608643,
422
+ "logits/real": -2.803241729736328,
423
+ "logps/generated": -130.2401885986328,
424
+ "logps/real": -103.78974914550781,
425
+ "loss": 0.2207,
426
+ "rewards/accuracies": 1.0,
427
+ "rewards/generated": -4.814304828643799,
428
+ "rewards/margins": 14.570037841796875,
429
+ "rewards/real": 9.755732536315918,
430
+ "step": 260
431
+ },
432
+ {
433
+ "epoch": 0.1727447216890595,
434
+ "grad_norm": 2.265151763557376,
435
+ "learning_rate": 4.5981507823613085e-07,
436
+ "logits/generated": -2.7566680908203125,
437
+ "logits/real": -2.747866153717041,
438
+ "logps/generated": -152.24049377441406,
439
+ "logps/real": -107.55720520019531,
440
+ "loss": 0.2249,
441
+ "rewards/accuracies": 0.987500011920929,
442
+ "rewards/generated": -6.19102144241333,
443
+ "rewards/margins": 15.924577713012695,
444
+ "rewards/real": 9.733556747436523,
445
+ "step": 270
446
+ },
447
+ {
448
+ "epoch": 0.17914267434420986,
449
+ "grad_norm": 3.5602666832695844,
450
+ "learning_rate": 4.562588904694168e-07,
451
+ "logits/generated": -2.7723679542541504,
452
+ "logits/real": -2.785216808319092,
453
+ "logps/generated": -128.1282958984375,
454
+ "logps/real": -111.1799545288086,
455
+ "loss": 0.2202,
456
+ "rewards/accuracies": 1.0,
457
+ "rewards/generated": -4.806519508361816,
458
+ "rewards/margins": 15.082513809204102,
459
+ "rewards/real": 10.275992393493652,
460
+ "step": 280
461
+ },
462
+ {
463
+ "epoch": 0.1855406269993602,
464
+ "grad_norm": 3.11519890787563,
465
+ "learning_rate": 4.5270270270270264e-07,
466
+ "logits/generated": -2.757876396179199,
467
+ "logits/real": -2.795422077178955,
468
+ "logps/generated": -128.11813354492188,
469
+ "logps/real": -102.88358306884766,
470
+ "loss": 0.233,
471
+ "rewards/accuracies": 0.987500011920929,
472
+ "rewards/generated": -4.833106994628906,
473
+ "rewards/margins": 14.29919147491455,
474
+ "rewards/real": 9.466082572937012,
475
+ "step": 290
476
+ },
477
+ {
478
+ "epoch": 0.19193857965451055,
479
+ "grad_norm": 3.487329720349146,
480
+ "learning_rate": 4.491465149359886e-07,
481
+ "logits/generated": -2.8109488487243652,
482
+ "logits/real": -2.7747206687927246,
483
+ "logps/generated": -129.84274291992188,
484
+ "logps/real": -106.18394470214844,
485
+ "loss": 0.2286,
486
+ "rewards/accuracies": 1.0,
487
+ "rewards/generated": -4.749394416809082,
488
+ "rewards/margins": 14.446981430053711,
489
+ "rewards/real": 9.697588920593262,
490
+ "step": 300
491
+ },
492
+ {
493
+ "epoch": 0.19833653230966092,
494
+ "grad_norm": 4.236344907363098,
495
+ "learning_rate": 4.4559032716927454e-07,
496
+ "logits/generated": -2.7962634563446045,
497
+ "logits/real": -2.86857533454895,
498
+ "logps/generated": -137.8242645263672,
499
+ "logps/real": -126.29652404785156,
500
+ "loss": 0.2454,
501
+ "rewards/accuracies": 1.0,
502
+ "rewards/generated": -5.464461803436279,
503
+ "rewards/margins": 16.495359420776367,
504
+ "rewards/real": 11.030899047851562,
505
+ "step": 310
506
+ },
507
+ {
508
+ "epoch": 0.20473448496481125,
509
+ "grad_norm": 2.0886040022985206,
510
+ "learning_rate": 4.420341394025605e-07,
511
+ "logits/generated": -2.780796766281128,
512
+ "logits/real": -2.830658197402954,
513
+ "logps/generated": -128.07977294921875,
514
+ "logps/real": -102.74151611328125,
515
+ "loss": 0.2217,
516
+ "rewards/accuracies": 0.987500011920929,
517
+ "rewards/generated": -4.719244956970215,
518
+ "rewards/margins": 13.948654174804688,
519
+ "rewards/real": 9.229410171508789,
520
+ "step": 320
521
+ },
522
+ {
523
+ "epoch": 0.21113243761996162,
524
+ "grad_norm": 2.7732179589224737,
525
+ "learning_rate": 4.384779516358463e-07,
526
+ "logits/generated": -2.771653175354004,
527
+ "logits/real": -2.719320058822632,
528
+ "logps/generated": -128.0609130859375,
529
+ "logps/real": -110.12910461425781,
530
+ "loss": 0.2298,
531
+ "rewards/accuracies": 1.0,
532
+ "rewards/generated": -4.775327682495117,
533
+ "rewards/margins": 14.936683654785156,
534
+ "rewards/real": 10.161355972290039,
535
+ "step": 330
536
+ },
537
+ {
538
+ "epoch": 0.21753039027511195,
539
+ "grad_norm": 2.4432792911747327,
540
+ "learning_rate": 4.3492176386913227e-07,
541
+ "logits/generated": -2.7587437629699707,
542
+ "logits/real": -2.802103042602539,
543
+ "logps/generated": -152.65858459472656,
544
+ "logps/real": -112.84379577636719,
545
+ "loss": 0.2312,
546
+ "rewards/accuracies": 1.0,
547
+ "rewards/generated": -6.072720527648926,
548
+ "rewards/margins": 16.01877212524414,
549
+ "rewards/real": 9.946051597595215,
550
+ "step": 340
551
+ },
552
+ {
553
+ "epoch": 0.22392834293026231,
554
+ "grad_norm": 4.160225941183556,
555
+ "learning_rate": 4.313655761024182e-07,
556
+ "logits/generated": -2.7678041458129883,
557
+ "logits/real": -2.7790327072143555,
558
+ "logps/generated": -120.19953918457031,
559
+ "logps/real": -113.22270202636719,
560
+ "loss": 0.2164,
561
+ "rewards/accuracies": 1.0,
562
+ "rewards/generated": -4.654033660888672,
563
+ "rewards/margins": 14.861625671386719,
564
+ "rewards/real": 10.207592010498047,
565
+ "step": 350
566
+ },
567
+ {
568
+ "epoch": 0.23032629558541268,
569
+ "grad_norm": 2.2403711981241106,
570
+ "learning_rate": 4.278093883357041e-07,
571
+ "logits/generated": -2.836555004119873,
572
+ "logits/real": -2.6989388465881348,
573
+ "logps/generated": -133.00775146484375,
574
+ "logps/real": -108.97412109375,
575
+ "loss": 0.2254,
576
+ "rewards/accuracies": 0.9750000238418579,
577
+ "rewards/generated": -4.826836109161377,
578
+ "rewards/margins": 14.98741340637207,
579
+ "rewards/real": 10.160576820373535,
580
+ "step": 360
581
+ },
582
+ {
583
+ "epoch": 0.236724248240563,
584
+ "grad_norm": 3.08941181412325,
585
+ "learning_rate": 4.2425320056899e-07,
586
+ "logits/generated": -2.7016754150390625,
587
+ "logits/real": -2.7019612789154053,
588
+ "logps/generated": -125.4333724975586,
589
+ "logps/real": -107.02604675292969,
590
+ "loss": 0.2184,
591
+ "rewards/accuracies": 1.0,
592
+ "rewards/generated": -4.93725061416626,
593
+ "rewards/margins": 14.800321578979492,
594
+ "rewards/real": 9.86307144165039,
595
+ "step": 370
596
+ },
597
+ {
598
+ "epoch": 0.24312220089571338,
599
+ "grad_norm": 8.811415551257076,
600
+ "learning_rate": 4.2069701280227595e-07,
601
+ "logits/generated": -2.7188172340393066,
602
+ "logits/real": -2.777225971221924,
603
+ "logps/generated": -131.77468872070312,
604
+ "logps/real": -97.81922912597656,
605
+ "loss": 0.2132,
606
+ "rewards/accuracies": 1.0,
607
+ "rewards/generated": -4.891592979431152,
608
+ "rewards/margins": 14.796887397766113,
609
+ "rewards/real": 9.905295372009277,
610
+ "step": 380
611
+ },
612
+ {
613
+ "epoch": 0.2495201535508637,
614
+ "grad_norm": 2.1656082848995992,
615
+ "learning_rate": 4.1714082503556185e-07,
616
+ "logits/generated": -2.81782865524292,
617
+ "logits/real": -2.733973264694214,
618
+ "logps/generated": -125.90080261230469,
619
+ "logps/real": -108.9613037109375,
620
+ "loss": 0.208,
621
+ "rewards/accuracies": 1.0,
622
+ "rewards/generated": -4.474228858947754,
623
+ "rewards/margins": 14.704130172729492,
624
+ "rewards/real": 10.229901313781738,
625
+ "step": 390
626
+ },
627
+ {
628
+ "epoch": 0.2559181062060141,
629
+ "grad_norm": 2.1913448139958307,
630
+ "learning_rate": 4.135846372688478e-07,
631
+ "logits/generated": -2.741250514984131,
632
+ "logits/real": -2.797882080078125,
633
+ "logps/generated": -140.27984619140625,
634
+ "logps/real": -113.54410552978516,
635
+ "loss": 0.2141,
636
+ "rewards/accuracies": 1.0,
637
+ "rewards/generated": -5.6347832679748535,
638
+ "rewards/margins": 16.515918731689453,
639
+ "rewards/real": 10.881135940551758,
640
+ "step": 400
641
+ },
642
+ {
643
+ "epoch": 0.2559181062060141,
644
+ "eval_logits/generated": -2.7752091884613037,
645
+ "eval_logits/real": -2.76131010055542,
646
+ "eval_logps/generated": -128.1855010986328,
647
+ "eval_logps/real": -113.70217895507812,
648
+ "eval_loss": 0.23566634953022003,
649
+ "eval_rewards/accuracies": 0.9921875,
650
+ "eval_rewards/generated": -4.946849822998047,
651
+ "eval_rewards/margins": 14.844742774963379,
652
+ "eval_rewards/real": 9.897892951965332,
653
+ "eval_runtime": 70.5415,
654
+ "eval_samples_per_second": 7.088,
655
+ "eval_steps_per_second": 0.227,
656
+ "step": 400
657
+ },
658
+ {
659
+ "epoch": 0.26231605886116444,
660
+ "grad_norm": 3.5382231043936563,
661
+ "learning_rate": 4.100284495021337e-07,
662
+ "logits/generated": -2.7606406211853027,
663
+ "logits/real": -2.8516087532043457,
664
+ "logps/generated": -150.7864990234375,
665
+ "logps/real": -107.80538177490234,
666
+ "loss": 0.222,
667
+ "rewards/accuracies": 1.0,
668
+ "rewards/generated": -6.30859899520874,
669
+ "rewards/margins": 16.771678924560547,
670
+ "rewards/real": 10.463083267211914,
671
+ "step": 410
672
+ },
673
+ {
674
+ "epoch": 0.2687140115163148,
675
+ "grad_norm": 2.180636822570479,
676
+ "learning_rate": 4.064722617354196e-07,
677
+ "logits/generated": -2.777144193649292,
678
+ "logits/real": -2.7791004180908203,
679
+ "logps/generated": -141.62130737304688,
680
+ "logps/real": -111.6278305053711,
681
+ "loss": 0.2179,
682
+ "rewards/accuracies": 1.0,
683
+ "rewards/generated": -5.845305442810059,
684
+ "rewards/margins": 16.107940673828125,
685
+ "rewards/real": 10.262636184692383,
686
+ "step": 420
687
+ },
688
+ {
689
+ "epoch": 0.2751119641714651,
690
+ "grad_norm": 7.826230059811001,
691
+ "learning_rate": 4.0291607396870553e-07,
692
+ "logits/generated": -2.7701034545898438,
693
+ "logits/real": -2.835702657699585,
694
+ "logps/generated": -134.1055450439453,
695
+ "logps/real": -115.15937805175781,
696
+ "loss": 0.2277,
697
+ "rewards/accuracies": 1.0,
698
+ "rewards/generated": -5.376823425292969,
699
+ "rewards/margins": 16.15007972717285,
700
+ "rewards/real": 10.773252487182617,
701
+ "step": 430
702
+ },
703
+ {
704
+ "epoch": 0.28150991682661547,
705
+ "grad_norm": 4.574173821690705,
706
+ "learning_rate": 3.993598862019915e-07,
707
+ "logits/generated": -2.7749392986297607,
708
+ "logits/real": -2.82127046585083,
709
+ "logps/generated": -133.71063232421875,
710
+ "logps/real": -103.6177978515625,
711
+ "loss": 0.2214,
712
+ "rewards/accuracies": 1.0,
713
+ "rewards/generated": -5.1843953132629395,
714
+ "rewards/margins": 14.904150009155273,
715
+ "rewards/real": 9.719755172729492,
716
+ "step": 440
717
+ },
718
+ {
719
+ "epoch": 0.28790786948176583,
720
+ "grad_norm": 2.774800684163752,
721
+ "learning_rate": 3.9580369843527737e-07,
722
+ "logits/generated": -2.744696617126465,
723
+ "logits/real": -2.7654058933258057,
724
+ "logps/generated": -157.8831024169922,
725
+ "logps/real": -103.58988952636719,
726
+ "loss": 0.2138,
727
+ "rewards/accuracies": 1.0,
728
+ "rewards/generated": -6.751193046569824,
729
+ "rewards/margins": 17.019277572631836,
730
+ "rewards/real": 10.268084526062012,
731
+ "step": 450
732
+ },
733
+ {
734
+ "epoch": 0.2943058221369162,
735
+ "grad_norm": 5.160429480281925,
736
+ "learning_rate": 3.9224751066856327e-07,
737
+ "logits/generated": -2.7339484691619873,
738
+ "logits/real": -2.685930013656616,
739
+ "logps/generated": -135.38714599609375,
740
+ "logps/real": -114.84906005859375,
741
+ "loss": 0.2204,
742
+ "rewards/accuracies": 1.0,
743
+ "rewards/generated": -5.209853649139404,
744
+ "rewards/margins": 15.920175552368164,
745
+ "rewards/real": 10.710321426391602,
746
+ "step": 460
747
+ },
748
+ {
749
+ "epoch": 0.30070377479206656,
750
+ "grad_norm": 4.706717543122211,
751
+ "learning_rate": 3.886913229018492e-07,
752
+ "logits/generated": -2.7286899089813232,
753
+ "logits/real": -2.782683849334717,
754
+ "logps/generated": -146.5428924560547,
755
+ "logps/real": -119.84122467041016,
756
+ "loss": 0.2176,
757
+ "rewards/accuracies": 1.0,
758
+ "rewards/generated": -5.839329719543457,
759
+ "rewards/margins": 16.770212173461914,
760
+ "rewards/real": 10.930883407592773,
761
+ "step": 470
762
+ },
763
+ {
764
+ "epoch": 0.30710172744721687,
765
+ "grad_norm": 2.3058413282374715,
766
+ "learning_rate": 3.851351351351351e-07,
767
+ "logits/generated": -2.7684266567230225,
768
+ "logits/real": -2.802767276763916,
769
+ "logps/generated": -138.41696166992188,
770
+ "logps/real": -113.2447738647461,
771
+ "loss": 0.2321,
772
+ "rewards/accuracies": 1.0,
773
+ "rewards/generated": -5.6439032554626465,
774
+ "rewards/margins": 15.897249221801758,
775
+ "rewards/real": 10.253345489501953,
776
+ "step": 480
777
+ },
778
+ {
779
+ "epoch": 0.31349968010236723,
780
+ "grad_norm": 3.3424369618748706,
781
+ "learning_rate": 3.8157894736842105e-07,
782
+ "logits/generated": -2.766950845718384,
783
+ "logits/real": -2.794663906097412,
784
+ "logps/generated": -123.19486236572266,
785
+ "logps/real": -98.98766326904297,
786
+ "loss": 0.2199,
787
+ "rewards/accuracies": 1.0,
788
+ "rewards/generated": -4.739457130432129,
789
+ "rewards/margins": 14.55401611328125,
790
+ "rewards/real": 9.814558029174805,
791
+ "step": 490
792
+ },
793
+ {
794
+ "epoch": 0.3198976327575176,
795
+ "grad_norm": 2.169410849214599,
796
+ "learning_rate": 3.7802275960170695e-07,
797
+ "logits/generated": -2.715301513671875,
798
+ "logits/real": -2.8804736137390137,
799
+ "logps/generated": -140.2711181640625,
800
+ "logps/real": -100.17481994628906,
801
+ "loss": 0.2348,
802
+ "rewards/accuracies": 0.987500011920929,
803
+ "rewards/generated": -5.455787658691406,
804
+ "rewards/margins": 15.202703475952148,
805
+ "rewards/real": 9.746915817260742,
806
+ "step": 500
807
+ },
808
+ {
809
+ "epoch": 0.32629558541266795,
810
+ "grad_norm": 2.04238252540844,
811
+ "learning_rate": 3.7446657183499284e-07,
812
+ "logits/generated": -2.7904624938964844,
813
+ "logits/real": -2.7905654907226562,
814
+ "logps/generated": -145.4995574951172,
815
+ "logps/real": -121.39151763916016,
816
+ "loss": 0.2262,
817
+ "rewards/accuracies": 1.0,
818
+ "rewards/generated": -5.898383140563965,
819
+ "rewards/margins": 17.675220489501953,
820
+ "rewards/real": 11.776837348937988,
821
+ "step": 510
822
+ },
823
+ {
824
+ "epoch": 0.3326935380678183,
825
+ "grad_norm": 2.4330841754647823,
826
+ "learning_rate": 3.709103840682788e-07,
827
+ "logits/generated": -2.728691577911377,
828
+ "logits/real": -2.8865561485290527,
829
+ "logps/generated": -156.2779998779297,
830
+ "logps/real": -108.7221450805664,
831
+ "loss": 0.2081,
832
+ "rewards/accuracies": 1.0,
833
+ "rewards/generated": -6.528653144836426,
834
+ "rewards/margins": 16.937610626220703,
835
+ "rewards/real": 10.408956527709961,
836
+ "step": 520
837
+ },
838
+ {
839
+ "epoch": 0.3390914907229686,
840
+ "grad_norm": 3.3742934939891174,
841
+ "learning_rate": 3.6735419630156474e-07,
842
+ "logits/generated": -2.819417953491211,
843
+ "logits/real": -2.816087245941162,
844
+ "logps/generated": -153.2522430419922,
845
+ "logps/real": -133.01565551757812,
846
+ "loss": 0.2314,
847
+ "rewards/accuracies": 1.0,
848
+ "rewards/generated": -6.505625247955322,
849
+ "rewards/margins": 18.493694305419922,
850
+ "rewards/real": 11.988069534301758,
851
+ "step": 530
852
+ },
853
+ {
854
+ "epoch": 0.345489443378119,
855
+ "grad_norm": 2.5651527495210242,
856
+ "learning_rate": 3.637980085348506e-07,
857
+ "logits/generated": -2.7839221954345703,
858
+ "logits/real": -2.799818515777588,
859
+ "logps/generated": -126.00923156738281,
860
+ "logps/real": -103.7330093383789,
861
+ "loss": 0.2194,
862
+ "rewards/accuracies": 1.0,
863
+ "rewards/generated": -4.9756646156311035,
864
+ "rewards/margins": 15.352749824523926,
865
+ "rewards/real": 10.377084732055664,
866
+ "step": 540
867
+ },
868
+ {
869
+ "epoch": 0.35188739603326935,
870
+ "grad_norm": 2.096110953513035,
871
+ "learning_rate": 3.602418207681365e-07,
872
+ "logits/generated": -2.7390735149383545,
873
+ "logits/real": -2.818681240081787,
874
+ "logps/generated": -123.09765625,
875
+ "logps/real": -97.98001861572266,
876
+ "loss": 0.211,
877
+ "rewards/accuracies": 0.987500011920929,
878
+ "rewards/generated": -4.679843902587891,
879
+ "rewards/margins": 14.732965469360352,
880
+ "rewards/real": 10.053121566772461,
881
+ "step": 550
882
+ },
883
+ {
884
+ "epoch": 0.3582853486884197,
885
+ "grad_norm": 2.3805224123873767,
886
+ "learning_rate": 3.5668563300142247e-07,
887
+ "logits/generated": -2.74855899810791,
888
+ "logits/real": -2.8764424324035645,
889
+ "logps/generated": -138.00978088378906,
890
+ "logps/real": -104.4416275024414,
891
+ "loss": 0.2092,
892
+ "rewards/accuracies": 1.0,
893
+ "rewards/generated": -5.503247261047363,
894
+ "rewards/margins": 15.430496215820312,
895
+ "rewards/real": 9.927248001098633,
896
+ "step": 560
897
+ },
898
+ {
899
+ "epoch": 0.3646833013435701,
900
+ "grad_norm": 2.9036557884699996,
901
+ "learning_rate": 3.5312944523470837e-07,
902
+ "logits/generated": -2.8142824172973633,
903
+ "logits/real": -2.8175225257873535,
904
+ "logps/generated": -138.83673095703125,
905
+ "logps/real": -105.87809753417969,
906
+ "loss": 0.2188,
907
+ "rewards/accuracies": 1.0,
908
+ "rewards/generated": -5.465325355529785,
909
+ "rewards/margins": 15.622749328613281,
910
+ "rewards/real": 10.157423973083496,
911
+ "step": 570
912
+ },
913
+ {
914
+ "epoch": 0.3710812539987204,
915
+ "grad_norm": 2.1880847814800948,
916
+ "learning_rate": 3.495732574679943e-07,
917
+ "logits/generated": -2.8173575401306152,
918
+ "logits/real": -2.8300251960754395,
919
+ "logps/generated": -149.59030151367188,
920
+ "logps/real": -106.2435302734375,
921
+ "loss": 0.2234,
922
+ "rewards/accuracies": 1.0,
923
+ "rewards/generated": -5.7458953857421875,
924
+ "rewards/margins": 16.33285140991211,
925
+ "rewards/real": 10.586955070495605,
926
+ "step": 580
927
+ },
928
+ {
929
+ "epoch": 0.37747920665387075,
930
+ "grad_norm": 2.192659377942886,
931
+ "learning_rate": 3.460170697012802e-07,
932
+ "logits/generated": -2.777217388153076,
933
+ "logits/real": -2.863020181655884,
934
+ "logps/generated": -129.68231201171875,
935
+ "logps/real": -108.95207214355469,
936
+ "loss": 0.2224,
937
+ "rewards/accuracies": 1.0,
938
+ "rewards/generated": -4.835152626037598,
939
+ "rewards/margins": 15.670806884765625,
940
+ "rewards/real": 10.835652351379395,
941
+ "step": 590
942
+ },
943
+ {
944
+ "epoch": 0.3838771593090211,
945
+ "grad_norm": 12.154950352869962,
946
+ "learning_rate": 3.424608819345661e-07,
947
+ "logits/generated": -2.823272228240967,
948
+ "logits/real": -2.7826805114746094,
949
+ "logps/generated": -117.91121673583984,
950
+ "logps/real": -111.25467681884766,
951
+ "loss": 0.2089,
952
+ "rewards/accuracies": 1.0,
953
+ "rewards/generated": -4.435954570770264,
954
+ "rewards/margins": 14.716769218444824,
955
+ "rewards/real": 10.280814170837402,
956
+ "step": 600
957
+ },
958
+ {
959
+ "epoch": 0.3838771593090211,
960
+ "eval_logits/generated": -2.7913894653320312,
961
+ "eval_logits/real": -2.798403024673462,
962
+ "eval_logps/generated": -127.67301940917969,
963
+ "eval_logps/real": -112.43653106689453,
964
+ "eval_loss": 0.23413488268852234,
965
+ "eval_rewards/accuracies": 0.9921875,
966
+ "eval_rewards/generated": -4.895601272583008,
967
+ "eval_rewards/margins": 14.920058250427246,
968
+ "eval_rewards/real": 10.024457931518555,
969
+ "eval_runtime": 70.7535,
970
+ "eval_samples_per_second": 7.067,
971
+ "eval_steps_per_second": 0.226,
972
+ "step": 600
973
+ },
974
+ {
975
+ "epoch": 0.3902751119641715,
976
+ "grad_norm": 2.0157880028714548,
977
+ "learning_rate": 3.3890469416785205e-07,
978
+ "logits/generated": -2.7878739833831787,
979
+ "logits/real": -2.886143445968628,
980
+ "logps/generated": -132.29232788085938,
981
+ "logps/real": -107.32408142089844,
982
+ "loss": 0.2185,
983
+ "rewards/accuracies": 1.0,
984
+ "rewards/generated": -5.14271354675293,
985
+ "rewards/margins": 15.367979049682617,
986
+ "rewards/real": 10.225264549255371,
987
+ "step": 610
988
+ },
989
+ {
990
+ "epoch": 0.39667306461932184,
991
+ "grad_norm": 3.4761506983717623,
992
+ "learning_rate": 3.35348506401138e-07,
993
+ "logits/generated": -2.800938129425049,
994
+ "logits/real": -2.9011588096618652,
995
+ "logps/generated": -152.92681884765625,
996
+ "logps/real": -90.30223083496094,
997
+ "loss": 0.2051,
998
+ "rewards/accuracies": 1.0,
999
+ "rewards/generated": -6.535437107086182,
1000
+ "rewards/margins": 15.690652847290039,
1001
+ "rewards/real": 9.155214309692383,
1002
+ "step": 620
1003
+ },
1004
+ {
1005
+ "epoch": 0.40307101727447214,
1006
+ "grad_norm": 2.066415328730325,
1007
+ "learning_rate": 3.3179231863442384e-07,
1008
+ "logits/generated": -2.7363858222961426,
1009
+ "logits/real": -2.9205002784729004,
1010
+ "logps/generated": -139.44544982910156,
1011
+ "logps/real": -93.95768737792969,
1012
+ "loss": 0.2054,
1013
+ "rewards/accuracies": 0.987500011920929,
1014
+ "rewards/generated": -5.3508195877075195,
1015
+ "rewards/margins": 14.589759826660156,
1016
+ "rewards/real": 9.23893928527832,
1017
+ "step": 630
1018
+ },
1019
+ {
1020
+ "epoch": 0.4094689699296225,
1021
+ "grad_norm": 2.720280779674223,
1022
+ "learning_rate": 3.282361308677098e-07,
1023
+ "logits/generated": -2.717585325241089,
1024
+ "logits/real": -2.8081445693969727,
1025
+ "logps/generated": -139.2246551513672,
1026
+ "logps/real": -95.35455322265625,
1027
+ "loss": 0.2105,
1028
+ "rewards/accuracies": 1.0,
1029
+ "rewards/generated": -5.649844169616699,
1030
+ "rewards/margins": 15.598958015441895,
1031
+ "rewards/real": 9.949111938476562,
1032
+ "step": 640
1033
+ },
1034
+ {
1035
+ "epoch": 0.41586692258477287,
1036
+ "grad_norm": 2.4467415453795534,
1037
+ "learning_rate": 3.2467994310099573e-07,
1038
+ "logits/generated": -2.746037244796753,
1039
+ "logits/real": -2.7760324478149414,
1040
+ "logps/generated": -151.04168701171875,
1041
+ "logps/real": -117.99039459228516,
1042
+ "loss": 0.228,
1043
+ "rewards/accuracies": 1.0,
1044
+ "rewards/generated": -6.320367336273193,
1045
+ "rewards/margins": 17.100696563720703,
1046
+ "rewards/real": 10.780328750610352,
1047
+ "step": 650
1048
+ },
1049
+ {
1050
+ "epoch": 0.42226487523992323,
1051
+ "grad_norm": 2.5369736094086752,
1052
+ "learning_rate": 3.211237553342817e-07,
1053
+ "logits/generated": -2.7743470668792725,
1054
+ "logits/real": -2.9016330242156982,
1055
+ "logps/generated": -169.43215942382812,
1056
+ "logps/real": -110.16139221191406,
1057
+ "loss": 0.2213,
1058
+ "rewards/accuracies": 1.0,
1059
+ "rewards/generated": -7.090481758117676,
1060
+ "rewards/margins": 17.460203170776367,
1061
+ "rewards/real": 10.369722366333008,
1062
+ "step": 660
1063
+ },
1064
+ {
1065
+ "epoch": 0.4286628278950736,
1066
+ "grad_norm": 3.940886446614688,
1067
+ "learning_rate": 3.175675675675675e-07,
1068
+ "logits/generated": -2.7578988075256348,
1069
+ "logits/real": -2.7422776222229004,
1070
+ "logps/generated": -142.6200408935547,
1071
+ "logps/real": -109.90318298339844,
1072
+ "loss": 0.2226,
1073
+ "rewards/accuracies": 1.0,
1074
+ "rewards/generated": -5.739054203033447,
1075
+ "rewards/margins": 16.534833908081055,
1076
+ "rewards/real": 10.79577922821045,
1077
+ "step": 670
1078
+ },
1079
+ {
1080
+ "epoch": 0.4350607805502239,
1081
+ "grad_norm": 1.9885549537383231,
1082
+ "learning_rate": 3.1401137980085347e-07,
1083
+ "logits/generated": -2.753906011581421,
1084
+ "logits/real": -2.8121540546417236,
1085
+ "logps/generated": -142.36412048339844,
1086
+ "logps/real": -95.9502182006836,
1087
+ "loss": 0.2135,
1088
+ "rewards/accuracies": 1.0,
1089
+ "rewards/generated": -6.0002851486206055,
1090
+ "rewards/margins": 15.877609252929688,
1091
+ "rewards/real": 9.877325057983398,
1092
+ "step": 680
1093
+ },
1094
+ {
1095
+ "epoch": 0.44145873320537427,
1096
+ "grad_norm": 2.360303864176274,
1097
+ "learning_rate": 3.104551920341394e-07,
1098
+ "logits/generated": -2.7917466163635254,
1099
+ "logits/real": -2.8809592723846436,
1100
+ "logps/generated": -128.33108520507812,
1101
+ "logps/real": -109.88875579833984,
1102
+ "loss": 0.2091,
1103
+ "rewards/accuracies": 1.0,
1104
+ "rewards/generated": -5.061340808868408,
1105
+ "rewards/margins": 15.903111457824707,
1106
+ "rewards/real": 10.841771125793457,
1107
+ "step": 690
1108
+ },
1109
+ {
1110
+ "epoch": 0.44785668586052463,
1111
+ "grad_norm": 2.2232483398700884,
1112
+ "learning_rate": 3.068990042674253e-07,
1113
+ "logits/generated": -2.7540788650512695,
1114
+ "logits/real": -2.8440656661987305,
1115
+ "logps/generated": -137.61160278320312,
1116
+ "logps/real": -110.4582290649414,
1117
+ "loss": 0.2235,
1118
+ "rewards/accuracies": 1.0,
1119
+ "rewards/generated": -5.629530906677246,
1120
+ "rewards/margins": 16.784801483154297,
1121
+ "rewards/real": 11.155269622802734,
1122
+ "step": 700
1123
+ },
1124
+ {
1125
+ "epoch": 0.454254638515675,
1126
+ "grad_norm": 2.420484904502962,
1127
+ "learning_rate": 3.033428165007112e-07,
1128
+ "logits/generated": -2.7791481018066406,
1129
+ "logits/real": -2.823207378387451,
1130
+ "logps/generated": -130.23995971679688,
1131
+ "logps/real": -104.56422424316406,
1132
+ "loss": 0.2045,
1133
+ "rewards/accuracies": 1.0,
1134
+ "rewards/generated": -5.046719551086426,
1135
+ "rewards/margins": 15.610448837280273,
1136
+ "rewards/real": 10.563729286193848,
1137
+ "step": 710
1138
+ },
1139
+ {
1140
+ "epoch": 0.46065259117082535,
1141
+ "grad_norm": 1.8814105476658205,
1142
+ "learning_rate": 2.9978662873399715e-07,
1143
+ "logits/generated": -2.754664897918701,
1144
+ "logits/real": -2.868396043777466,
1145
+ "logps/generated": -138.9653778076172,
1146
+ "logps/real": -104.86256408691406,
1147
+ "loss": 0.2139,
1148
+ "rewards/accuracies": 1.0,
1149
+ "rewards/generated": -5.563291549682617,
1150
+ "rewards/margins": 16.40620994567871,
1151
+ "rewards/real": 10.842918395996094,
1152
+ "step": 720
1153
+ },
1154
+ {
1155
+ "epoch": 0.46705054382597566,
1156
+ "grad_norm": 1.9123445490996223,
1157
+ "learning_rate": 2.9623044096728305e-07,
1158
+ "logits/generated": -2.7599892616271973,
1159
+ "logits/real": -2.836871862411499,
1160
+ "logps/generated": -143.76820373535156,
1161
+ "logps/real": -101.16252136230469,
1162
+ "loss": 0.2134,
1163
+ "rewards/accuracies": 1.0,
1164
+ "rewards/generated": -5.618950843811035,
1165
+ "rewards/margins": 16.395917892456055,
1166
+ "rewards/real": 10.776965141296387,
1167
+ "step": 730
1168
+ },
1169
+ {
1170
+ "epoch": 0.473448496481126,
1171
+ "grad_norm": 1.9685905302445135,
1172
+ "learning_rate": 2.92674253200569e-07,
1173
+ "logits/generated": -2.7747621536254883,
1174
+ "logits/real": -2.7902274131774902,
1175
+ "logps/generated": -118.72608947753906,
1176
+ "logps/real": -107.44352722167969,
1177
+ "loss": 0.2035,
1178
+ "rewards/accuracies": 1.0,
1179
+ "rewards/generated": -4.259701728820801,
1180
+ "rewards/margins": 15.15577220916748,
1181
+ "rewards/real": 10.89607048034668,
1182
+ "step": 740
1183
+ },
1184
+ {
1185
+ "epoch": 0.4798464491362764,
1186
+ "grad_norm": 2.185389542016207,
1187
+ "learning_rate": 2.8911806543385494e-07,
1188
+ "logits/generated": -2.694744348526001,
1189
+ "logits/real": -2.7872262001037598,
1190
+ "logps/generated": -117.7671890258789,
1191
+ "logps/real": -92.30455017089844,
1192
+ "loss": 0.2175,
1193
+ "rewards/accuracies": 0.987500011920929,
1194
+ "rewards/generated": -4.346129894256592,
1195
+ "rewards/margins": 13.698951721191406,
1196
+ "rewards/real": 9.352822303771973,
1197
+ "step": 750
1198
+ },
1199
+ {
1200
+ "epoch": 0.48624440179142675,
1201
+ "grad_norm": 2.0382414250965732,
1202
+ "learning_rate": 2.855618776671408e-07,
1203
+ "logits/generated": -2.7974352836608887,
1204
+ "logits/real": -2.8290600776672363,
1205
+ "logps/generated": -132.22039794921875,
1206
+ "logps/real": -104.40059661865234,
1207
+ "loss": 0.2157,
1208
+ "rewards/accuracies": 0.987500011920929,
1209
+ "rewards/generated": -5.511082649230957,
1210
+ "rewards/margins": 15.783149719238281,
1211
+ "rewards/real": 10.272066116333008,
1212
+ "step": 760
1213
+ },
1214
+ {
1215
+ "epoch": 0.4926423544465771,
1216
+ "grad_norm": 2.1388688577679016,
1217
+ "learning_rate": 2.8200568990042673e-07,
1218
+ "logits/generated": -2.7909514904022217,
1219
+ "logits/real": -2.808551073074341,
1220
+ "logps/generated": -151.1422119140625,
1221
+ "logps/real": -116.09953308105469,
1222
+ "loss": 0.2271,
1223
+ "rewards/accuracies": 1.0,
1224
+ "rewards/generated": -6.148834228515625,
1225
+ "rewards/margins": 17.4340877532959,
1226
+ "rewards/real": 11.28525447845459,
1227
+ "step": 770
1228
+ },
1229
+ {
1230
+ "epoch": 0.4990403071017274,
1231
+ "grad_norm": 2.2656673226538717,
1232
+ "learning_rate": 2.784495021337127e-07,
1233
+ "logits/generated": -2.7903616428375244,
1234
+ "logits/real": -2.8122215270996094,
1235
+ "logps/generated": -138.20358276367188,
1236
+ "logps/real": -104.3212661743164,
1237
+ "loss": 0.2234,
1238
+ "rewards/accuracies": 0.987500011920929,
1239
+ "rewards/generated": -5.353945732116699,
1240
+ "rewards/margins": 16.125553131103516,
1241
+ "rewards/real": 10.771608352661133,
1242
+ "step": 780
1243
+ },
1244
+ {
1245
+ "epoch": 0.5054382597568778,
1246
+ "grad_norm": 2.1885026864804624,
1247
+ "learning_rate": 2.7489331436699857e-07,
1248
+ "logits/generated": -2.8039913177490234,
1249
+ "logits/real": -2.7904744148254395,
1250
+ "logps/generated": -129.17857360839844,
1251
+ "logps/real": -115.23606872558594,
1252
+ "loss": 0.2302,
1253
+ "rewards/accuracies": 1.0,
1254
+ "rewards/generated": -5.161337852478027,
1255
+ "rewards/margins": 16.69219398498535,
1256
+ "rewards/real": 11.530855178833008,
1257
+ "step": 790
1258
+ },
1259
+ {
1260
+ "epoch": 0.5118362124120281,
1261
+ "grad_norm": 2.869890153550306,
1262
+ "learning_rate": 2.7133712660028446e-07,
1263
+ "logits/generated": -2.773207426071167,
1264
+ "logits/real": -2.7846627235412598,
1265
+ "logps/generated": -119.42828369140625,
1266
+ "logps/real": -115.4027099609375,
1267
+ "loss": 0.2148,
1268
+ "rewards/accuracies": 1.0,
1269
+ "rewards/generated": -4.340595722198486,
1270
+ "rewards/margins": 15.677358627319336,
1271
+ "rewards/real": 11.336763381958008,
1272
+ "step": 800
1273
+ },
1274
+ {
1275
+ "epoch": 0.5118362124120281,
1276
+ "eval_logits/generated": -2.8194947242736816,
1277
+ "eval_logits/real": -2.823777437210083,
1278
+ "eval_logps/generated": -129.62098693847656,
1279
+ "eval_logps/real": -112.27095031738281,
1280
+ "eval_loss": 0.23086096346378326,
1281
+ "eval_rewards/accuracies": 0.9921875,
1282
+ "eval_rewards/generated": -5.09039831161499,
1283
+ "eval_rewards/margins": 15.131412506103516,
1284
+ "eval_rewards/real": 10.041014671325684,
1285
+ "eval_runtime": 69.2574,
1286
+ "eval_samples_per_second": 7.219,
1287
+ "eval_steps_per_second": 0.231,
1288
+ "step": 800
1289
+ },
1290
+ {
1291
+ "epoch": 0.5182341650671785,
1292
+ "grad_norm": 2.356636174793149,
1293
+ "learning_rate": 2.677809388335704e-07,
1294
+ "logits/generated": -2.8063623905181885,
1295
+ "logits/real": -2.863706111907959,
1296
+ "logps/generated": -147.79519653320312,
1297
+ "logps/real": -103.06642150878906,
1298
+ "loss": 0.2038,
1299
+ "rewards/accuracies": 1.0,
1300
+ "rewards/generated": -5.835212707519531,
1301
+ "rewards/margins": 16.04536247253418,
1302
+ "rewards/real": 10.210151672363281,
1303
+ "step": 810
1304
+ },
1305
+ {
1306
+ "epoch": 0.5246321177223289,
1307
+ "grad_norm": 2.4379730238495885,
1308
+ "learning_rate": 2.642247510668563e-07,
1309
+ "logits/generated": -2.8116323947906494,
1310
+ "logits/real": -2.8085341453552246,
1311
+ "logps/generated": -135.0247039794922,
1312
+ "logps/real": -88.69074249267578,
1313
+ "loss": 0.2247,
1314
+ "rewards/accuracies": 1.0,
1315
+ "rewards/generated": -5.124009132385254,
1316
+ "rewards/margins": 14.631864547729492,
1317
+ "rewards/real": 9.507856369018555,
1318
+ "step": 820
1319
+ },
1320
+ {
1321
+ "epoch": 0.5310300703774792,
1322
+ "grad_norm": 2.4726280945236083,
1323
+ "learning_rate": 2.6066856330014225e-07,
1324
+ "logits/generated": -2.7714428901672363,
1325
+ "logits/real": -2.796095132827759,
1326
+ "logps/generated": -138.78175354003906,
1327
+ "logps/real": -103.1611099243164,
1328
+ "loss": 0.2042,
1329
+ "rewards/accuracies": 1.0,
1330
+ "rewards/generated": -5.705328941345215,
1331
+ "rewards/margins": 16.654027938842773,
1332
+ "rewards/real": 10.948700904846191,
1333
+ "step": 830
1334
+ },
1335
+ {
1336
+ "epoch": 0.5374280230326296,
1337
+ "grad_norm": 2.0930312705666965,
1338
+ "learning_rate": 2.5711237553342815e-07,
1339
+ "logits/generated": -2.796297311782837,
1340
+ "logits/real": -2.761157512664795,
1341
+ "logps/generated": -132.76963806152344,
1342
+ "logps/real": -101.53390502929688,
1343
+ "loss": 0.2154,
1344
+ "rewards/accuracies": 1.0,
1345
+ "rewards/generated": -5.048023223876953,
1346
+ "rewards/margins": 16.279930114746094,
1347
+ "rewards/real": 11.231904983520508,
1348
+ "step": 840
1349
+ },
1350
+ {
1351
+ "epoch": 0.5438259756877799,
1352
+ "grad_norm": 1.995146822930404,
1353
+ "learning_rate": 2.5355618776671404e-07,
1354
+ "logits/generated": -2.74373722076416,
1355
+ "logits/real": -2.8401942253112793,
1356
+ "logps/generated": -134.88999938964844,
1357
+ "logps/real": -108.44051361083984,
1358
+ "loss": 0.2085,
1359
+ "rewards/accuracies": 1.0,
1360
+ "rewards/generated": -5.090542793273926,
1361
+ "rewards/margins": 16.1528377532959,
1362
+ "rewards/real": 11.06229305267334,
1363
+ "step": 850
1364
+ },
1365
+ {
1366
+ "epoch": 0.5502239283429302,
1367
+ "grad_norm": 2.383171664852739,
1368
+ "learning_rate": 2.5e-07,
1369
+ "logits/generated": -2.734670639038086,
1370
+ "logits/real": -2.836339235305786,
1371
+ "logps/generated": -138.05905151367188,
1372
+ "logps/real": -96.95460510253906,
1373
+ "loss": 0.2,
1374
+ "rewards/accuracies": 1.0,
1375
+ "rewards/generated": -5.488851070404053,
1376
+ "rewards/margins": 15.819448471069336,
1377
+ "rewards/real": 10.330598831176758,
1378
+ "step": 860
1379
+ },
1380
+ {
1381
+ "epoch": 0.5566218809980806,
1382
+ "grad_norm": 2.20520607110751,
1383
+ "learning_rate": 2.4644381223328594e-07,
1384
+ "logits/generated": -2.7709481716156006,
1385
+ "logits/real": -2.7831263542175293,
1386
+ "logps/generated": -137.1595916748047,
1387
+ "logps/real": -114.11800384521484,
1388
+ "loss": 0.2189,
1389
+ "rewards/accuracies": 1.0,
1390
+ "rewards/generated": -5.659204959869385,
1391
+ "rewards/margins": 17.742206573486328,
1392
+ "rewards/real": 12.083002090454102,
1393
+ "step": 870
1394
+ },
1395
+ {
1396
+ "epoch": 0.5630198336532309,
1397
+ "grad_norm": 2.6353918937836847,
1398
+ "learning_rate": 2.4288762446657183e-07,
1399
+ "logits/generated": -2.708103656768799,
1400
+ "logits/real": -2.895563840866089,
1401
+ "logps/generated": -141.7598876953125,
1402
+ "logps/real": -99.23255920410156,
1403
+ "loss": 0.2069,
1404
+ "rewards/accuracies": 1.0,
1405
+ "rewards/generated": -5.729107856750488,
1406
+ "rewards/margins": 15.888574600219727,
1407
+ "rewards/real": 10.159466743469238,
1408
+ "step": 880
1409
+ },
1410
+ {
1411
+ "epoch": 0.5694177863083814,
1412
+ "grad_norm": 2.159987018302931,
1413
+ "learning_rate": 2.393314366998578e-07,
1414
+ "logits/generated": -2.8029890060424805,
1415
+ "logits/real": -2.899193048477173,
1416
+ "logps/generated": -142.26734924316406,
1417
+ "logps/real": -89.63228607177734,
1418
+ "loss": 0.2086,
1419
+ "rewards/accuracies": 1.0,
1420
+ "rewards/generated": -5.678102493286133,
1421
+ "rewards/margins": 15.533955574035645,
1422
+ "rewards/real": 9.855853080749512,
1423
+ "step": 890
1424
+ },
1425
+ {
1426
+ "epoch": 0.5758157389635317,
1427
+ "grad_norm": 2.829485679186667,
1428
+ "learning_rate": 2.3577524893314365e-07,
1429
+ "logits/generated": -2.8933000564575195,
1430
+ "logits/real": -2.8898868560791016,
1431
+ "logps/generated": -140.3587646484375,
1432
+ "logps/real": -119.44731140136719,
1433
+ "loss": 0.2126,
1434
+ "rewards/accuracies": 1.0,
1435
+ "rewards/generated": -5.4967241287231445,
1436
+ "rewards/margins": 17.632753372192383,
1437
+ "rewards/real": 12.136028289794922,
1438
+ "step": 900
1439
+ },
1440
+ {
1441
+ "epoch": 0.582213691618682,
1442
+ "grad_norm": 2.076889704737332,
1443
+ "learning_rate": 2.322190611664296e-07,
1444
+ "logits/generated": -2.783379077911377,
1445
+ "logits/real": -2.837111473083496,
1446
+ "logps/generated": -159.20602416992188,
1447
+ "logps/real": -101.35649108886719,
1448
+ "loss": 0.2152,
1449
+ "rewards/accuracies": 1.0,
1450
+ "rewards/generated": -6.505612850189209,
1451
+ "rewards/margins": 16.421142578125,
1452
+ "rewards/real": 9.91553020477295,
1453
+ "step": 910
1454
+ },
1455
+ {
1456
+ "epoch": 0.5886116442738324,
1457
+ "grad_norm": 2.9561242755966917,
1458
+ "learning_rate": 2.2866287339971549e-07,
1459
+ "logits/generated": -2.7626872062683105,
1460
+ "logits/real": -2.775630235671997,
1461
+ "logps/generated": -142.0345916748047,
1462
+ "logps/real": -105.5779037475586,
1463
+ "loss": 0.2106,
1464
+ "rewards/accuracies": 1.0,
1465
+ "rewards/generated": -5.930171012878418,
1466
+ "rewards/margins": 16.78856086730957,
1467
+ "rewards/real": 10.858389854431152,
1468
+ "step": 920
1469
+ },
1470
+ {
1471
+ "epoch": 0.5950095969289827,
1472
+ "grad_norm": 5.343288980733804,
1473
+ "learning_rate": 2.251066856330014e-07,
1474
+ "logits/generated": -2.863882064819336,
1475
+ "logits/real": -2.7736942768096924,
1476
+ "logps/generated": -146.3633575439453,
1477
+ "logps/real": -116.1565170288086,
1478
+ "loss": 0.2372,
1479
+ "rewards/accuracies": 1.0,
1480
+ "rewards/generated": -6.034173965454102,
1481
+ "rewards/margins": 17.2874755859375,
1482
+ "rewards/real": 11.253301620483398,
1483
+ "step": 930
1484
+ },
1485
+ {
1486
+ "epoch": 0.6014075495841331,
1487
+ "grad_norm": 2.1700141539005418,
1488
+ "learning_rate": 2.2155049786628733e-07,
1489
+ "logits/generated": -2.7782211303710938,
1490
+ "logits/real": -2.784595012664795,
1491
+ "logps/generated": -126.44002532958984,
1492
+ "logps/real": -98.55274200439453,
1493
+ "loss": 0.2121,
1494
+ "rewards/accuracies": 1.0,
1495
+ "rewards/generated": -4.89090633392334,
1496
+ "rewards/margins": 15.318072319030762,
1497
+ "rewards/real": 10.427166938781738,
1498
+ "step": 940
1499
+ },
1500
+ {
1501
+ "epoch": 0.6078055022392834,
1502
+ "grad_norm": 2.086474199547169,
1503
+ "learning_rate": 2.1799431009957325e-07,
1504
+ "logits/generated": -2.7844066619873047,
1505
+ "logits/real": -2.87266206741333,
1506
+ "logps/generated": -154.29930114746094,
1507
+ "logps/real": -118.48374938964844,
1508
+ "loss": 0.2242,
1509
+ "rewards/accuracies": 1.0,
1510
+ "rewards/generated": -5.924124717712402,
1511
+ "rewards/margins": 17.47787857055664,
1512
+ "rewards/real": 11.553754806518555,
1513
+ "step": 950
1514
+ },
1515
+ {
1516
+ "epoch": 0.6142034548944337,
1517
+ "grad_norm": 5.893270475169604,
1518
+ "learning_rate": 2.1443812233285914e-07,
1519
+ "logits/generated": -2.7291111946105957,
1520
+ "logits/real": -2.7862040996551514,
1521
+ "logps/generated": -128.32461547851562,
1522
+ "logps/real": -88.90164947509766,
1523
+ "loss": 0.2049,
1524
+ "rewards/accuracies": 0.987500011920929,
1525
+ "rewards/generated": -5.07694149017334,
1526
+ "rewards/margins": 14.397714614868164,
1527
+ "rewards/real": 9.320773124694824,
1528
+ "step": 960
1529
+ },
1530
+ {
1531
+ "epoch": 0.6206014075495841,
1532
+ "grad_norm": 3.4745504874973854,
1533
+ "learning_rate": 2.108819345661451e-07,
1534
+ "logits/generated": -2.7670676708221436,
1535
+ "logits/real": -2.906858205795288,
1536
+ "logps/generated": -142.1813507080078,
1537
+ "logps/real": -99.36964416503906,
1538
+ "loss": 0.2152,
1539
+ "rewards/accuracies": 1.0,
1540
+ "rewards/generated": -5.543498992919922,
1541
+ "rewards/margins": 15.908276557922363,
1542
+ "rewards/real": 10.364778518676758,
1543
+ "step": 970
1544
+ },
1545
+ {
1546
+ "epoch": 0.6269993602047345,
1547
+ "grad_norm": 1.9536154527758394,
1548
+ "learning_rate": 2.0732574679943098e-07,
1549
+ "logits/generated": -2.7182888984680176,
1550
+ "logits/real": -2.795628070831299,
1551
+ "logps/generated": -123.13966369628906,
1552
+ "logps/real": -92.99507141113281,
1553
+ "loss": 0.2048,
1554
+ "rewards/accuracies": 1.0,
1555
+ "rewards/generated": -4.866164207458496,
1556
+ "rewards/margins": 14.955586433410645,
1557
+ "rewards/real": 10.089422225952148,
1558
+ "step": 980
1559
+ },
1560
+ {
1561
+ "epoch": 0.6333973128598849,
1562
+ "grad_norm": 2.3702688907183425,
1563
+ "learning_rate": 2.0376955903271693e-07,
1564
+ "logits/generated": -2.792490243911743,
1565
+ "logits/real": -2.814042568206787,
1566
+ "logps/generated": -122.5984878540039,
1567
+ "logps/real": -119.72834777832031,
1568
+ "loss": 0.2154,
1569
+ "rewards/accuracies": 1.0,
1570
+ "rewards/generated": -4.933380603790283,
1571
+ "rewards/margins": 16.830997467041016,
1572
+ "rewards/real": 11.897615432739258,
1573
+ "step": 990
1574
+ },
1575
+ {
1576
+ "epoch": 0.6397952655150352,
1577
+ "grad_norm": 2.710438187714284,
1578
+ "learning_rate": 2.0021337126600283e-07,
1579
+ "logits/generated": -2.7410390377044678,
1580
+ "logits/real": -2.817781925201416,
1581
+ "logps/generated": -161.62539672851562,
1582
+ "logps/real": -91.64434051513672,
1583
+ "loss": 0.1994,
1584
+ "rewards/accuracies": 1.0,
1585
+ "rewards/generated": -6.895718574523926,
1586
+ "rewards/margins": 17.067646026611328,
1587
+ "rewards/real": 10.171927452087402,
1588
+ "step": 1000
1589
+ },
1590
+ {
1591
+ "epoch": 0.6397952655150352,
1592
+ "eval_logits/generated": -2.74424147605896,
1593
+ "eval_logits/real": -2.7461187839508057,
1594
+ "eval_logps/generated": -130.59327697753906,
1595
+ "eval_logps/real": -111.54972839355469,
1596
+ "eval_loss": 0.23031392693519592,
1597
+ "eval_rewards/accuracies": 0.9921875,
1598
+ "eval_rewards/generated": -5.187626838684082,
1599
+ "eval_rewards/margins": 15.300764083862305,
1600
+ "eval_rewards/real": 10.113137245178223,
1601
+ "eval_runtime": 69.5849,
1602
+ "eval_samples_per_second": 7.185,
1603
+ "eval_steps_per_second": 0.23,
1604
+ "step": 1000
1605
+ },
1606
+ {
1607
+ "epoch": 0.6461932181701855,
1608
+ "grad_norm": 2.2354079100751845,
1609
+ "learning_rate": 1.9665718349928875e-07,
1610
+ "logits/generated": -2.7386083602905273,
1611
+ "logits/real": -2.7921862602233887,
1612
+ "logps/generated": -145.14804077148438,
1613
+ "logps/real": -100.52366638183594,
1614
+ "loss": 0.1945,
1615
+ "rewards/accuracies": 1.0,
1616
+ "rewards/generated": -6.059700012207031,
1617
+ "rewards/margins": 16.424558639526367,
1618
+ "rewards/real": 10.364858627319336,
1619
+ "step": 1010
1620
+ },
1621
+ {
1622
+ "epoch": 0.6525911708253359,
1623
+ "grad_norm": 2.1844813037214124,
1624
+ "learning_rate": 1.931009957325747e-07,
1625
+ "logits/generated": -2.7453203201293945,
1626
+ "logits/real": -2.812004327774048,
1627
+ "logps/generated": -147.8553466796875,
1628
+ "logps/real": -100.11280822753906,
1629
+ "loss": 0.2096,
1630
+ "rewards/accuracies": 1.0,
1631
+ "rewards/generated": -5.994841575622559,
1632
+ "rewards/margins": 15.893636703491211,
1633
+ "rewards/real": 9.898796081542969,
1634
+ "step": 1020
1635
+ },
1636
+ {
1637
+ "epoch": 0.6589891234804862,
1638
+ "grad_norm": 2.046398564222724,
1639
+ "learning_rate": 1.895448079658606e-07,
1640
+ "logits/generated": -2.7719972133636475,
1641
+ "logits/real": -2.837329387664795,
1642
+ "logps/generated": -141.85464477539062,
1643
+ "logps/real": -95.72025299072266,
1644
+ "loss": 0.1927,
1645
+ "rewards/accuracies": 1.0,
1646
+ "rewards/generated": -5.702435493469238,
1647
+ "rewards/margins": 15.670855522155762,
1648
+ "rewards/real": 9.968420028686523,
1649
+ "step": 1030
1650
+ },
1651
+ {
1652
+ "epoch": 0.6653870761356366,
1653
+ "grad_norm": 2.2879001250497297,
1654
+ "learning_rate": 1.859886201991465e-07,
1655
+ "logits/generated": -2.708482265472412,
1656
+ "logits/real": -2.903043270111084,
1657
+ "logps/generated": -139.8241729736328,
1658
+ "logps/real": -103.22698974609375,
1659
+ "loss": 0.2013,
1660
+ "rewards/accuracies": 1.0,
1661
+ "rewards/generated": -5.350104331970215,
1662
+ "rewards/margins": 16.140804290771484,
1663
+ "rewards/real": 10.790700912475586,
1664
+ "step": 1040
1665
+ },
1666
+ {
1667
+ "epoch": 0.6717850287907869,
1668
+ "grad_norm": 4.628698290056213,
1669
+ "learning_rate": 1.8243243243243243e-07,
1670
+ "logits/generated": -2.748556137084961,
1671
+ "logits/real": -2.8339285850524902,
1672
+ "logps/generated": -148.6436004638672,
1673
+ "logps/real": -114.01179504394531,
1674
+ "loss": 0.2241,
1675
+ "rewards/accuracies": 1.0,
1676
+ "rewards/generated": -5.570124626159668,
1677
+ "rewards/margins": 17.3154354095459,
1678
+ "rewards/real": 11.745311737060547,
1679
+ "step": 1050
1680
+ },
1681
+ {
1682
+ "epoch": 0.6781829814459372,
1683
+ "grad_norm": 3.6247485539345417,
1684
+ "learning_rate": 1.7887624466571835e-07,
1685
+ "logits/generated": -2.8411951065063477,
1686
+ "logits/real": -2.7839694023132324,
1687
+ "logps/generated": -123.47579193115234,
1688
+ "logps/real": -112.1775131225586,
1689
+ "loss": 0.2218,
1690
+ "rewards/accuracies": 1.0,
1691
+ "rewards/generated": -4.763791561126709,
1692
+ "rewards/margins": 16.458126068115234,
1693
+ "rewards/real": 11.694334030151367,
1694
+ "step": 1060
1695
+ },
1696
+ {
1697
+ "epoch": 0.6845809341010877,
1698
+ "grad_norm": 2.2522203971012487,
1699
+ "learning_rate": 1.7532005689900424e-07,
1700
+ "logits/generated": -2.7332136631011963,
1701
+ "logits/real": -2.804201602935791,
1702
+ "logps/generated": -142.97860717773438,
1703
+ "logps/real": -85.4603271484375,
1704
+ "loss": 0.1947,
1705
+ "rewards/accuracies": 1.0,
1706
+ "rewards/generated": -5.868603706359863,
1707
+ "rewards/margins": 15.239709854125977,
1708
+ "rewards/real": 9.37110424041748,
1709
+ "step": 1070
1710
+ },
1711
+ {
1712
+ "epoch": 0.690978886756238,
1713
+ "grad_norm": 2.2382250321484225,
1714
+ "learning_rate": 1.717638691322902e-07,
1715
+ "logits/generated": -2.7342700958251953,
1716
+ "logits/real": -2.8574981689453125,
1717
+ "logps/generated": -151.73580932617188,
1718
+ "logps/real": -103.33231353759766,
1719
+ "loss": 0.2102,
1720
+ "rewards/accuracies": 1.0,
1721
+ "rewards/generated": -6.019370079040527,
1722
+ "rewards/margins": 16.717998504638672,
1723
+ "rewards/real": 10.698628425598145,
1724
+ "step": 1080
1725
+ },
1726
+ {
1727
+ "epoch": 0.6973768394113884,
1728
+ "grad_norm": 4.541077368968895,
1729
+ "learning_rate": 1.6820768136557609e-07,
1730
+ "logits/generated": -2.6836509704589844,
1731
+ "logits/real": -2.7782435417175293,
1732
+ "logps/generated": -140.19631958007812,
1733
+ "logps/real": -103.10017395019531,
1734
+ "loss": 0.2103,
1735
+ "rewards/accuracies": 1.0,
1736
+ "rewards/generated": -5.432912349700928,
1737
+ "rewards/margins": 16.41040802001953,
1738
+ "rewards/real": 10.977496147155762,
1739
+ "step": 1090
1740
+ },
1741
+ {
1742
+ "epoch": 0.7037747920665387,
1743
+ "grad_norm": 2.134995135769877,
1744
+ "learning_rate": 1.64651493598862e-07,
1745
+ "logits/generated": -2.700814962387085,
1746
+ "logits/real": -2.7614195346832275,
1747
+ "logps/generated": -139.90142822265625,
1748
+ "logps/real": -101.66877746582031,
1749
+ "loss": 0.2131,
1750
+ "rewards/accuracies": 0.987500011920929,
1751
+ "rewards/generated": -5.645236015319824,
1752
+ "rewards/margins": 16.219711303710938,
1753
+ "rewards/real": 10.57447624206543,
1754
+ "step": 1100
1755
+ },
1756
+ {
1757
+ "epoch": 0.710172744721689,
1758
+ "grad_norm": 2.1362568113809277,
1759
+ "learning_rate": 1.6109530583214793e-07,
1760
+ "logits/generated": -2.7405338287353516,
1761
+ "logits/real": -2.801772117614746,
1762
+ "logps/generated": -135.68980407714844,
1763
+ "logps/real": -115.05977630615234,
1764
+ "loss": 0.2147,
1765
+ "rewards/accuracies": 1.0,
1766
+ "rewards/generated": -5.29919958114624,
1767
+ "rewards/margins": 17.55141258239746,
1768
+ "rewards/real": 12.252215385437012,
1769
+ "step": 1110
1770
+ },
1771
+ {
1772
+ "epoch": 0.7165706973768394,
1773
+ "grad_norm": 2.6687487627386965,
1774
+ "learning_rate": 1.5753911806543385e-07,
1775
+ "logits/generated": -2.655336380004883,
1776
+ "logits/real": -2.716346502304077,
1777
+ "logps/generated": -138.2339630126953,
1778
+ "logps/real": -106.83113098144531,
1779
+ "loss": 0.2236,
1780
+ "rewards/accuracies": 1.0,
1781
+ "rewards/generated": -5.799440860748291,
1782
+ "rewards/margins": 16.477214813232422,
1783
+ "rewards/real": 10.677773475646973,
1784
+ "step": 1120
1785
+ },
1786
+ {
1787
+ "epoch": 0.7229686500319897,
1788
+ "grad_norm": 1.9796488466759263,
1789
+ "learning_rate": 1.5398293029871974e-07,
1790
+ "logits/generated": -2.7700233459472656,
1791
+ "logits/real": -2.719053268432617,
1792
+ "logps/generated": -138.2145233154297,
1793
+ "logps/real": -101.98434448242188,
1794
+ "loss": 0.1982,
1795
+ "rewards/accuracies": 1.0,
1796
+ "rewards/generated": -5.574553966522217,
1797
+ "rewards/margins": 15.94153118133545,
1798
+ "rewards/real": 10.366975784301758,
1799
+ "step": 1130
1800
+ },
1801
+ {
1802
+ "epoch": 0.7293666026871402,
1803
+ "grad_norm": 1.97652173855478,
1804
+ "learning_rate": 1.504267425320057e-07,
1805
+ "logits/generated": -2.749962091445923,
1806
+ "logits/real": -2.7098100185394287,
1807
+ "logps/generated": -128.74168395996094,
1808
+ "logps/real": -103.87724304199219,
1809
+ "loss": 0.203,
1810
+ "rewards/accuracies": 1.0,
1811
+ "rewards/generated": -5.09400749206543,
1812
+ "rewards/margins": 16.894054412841797,
1813
+ "rewards/real": 11.800048828125,
1814
+ "step": 1140
1815
+ },
1816
+ {
1817
+ "epoch": 0.7357645553422905,
1818
+ "grad_norm": 1.9276505695865507,
1819
+ "learning_rate": 1.4687055476529158e-07,
1820
+ "logits/generated": -2.7056586742401123,
1821
+ "logits/real": -2.84588885307312,
1822
+ "logps/generated": -132.11521911621094,
1823
+ "logps/real": -103.04747009277344,
1824
+ "loss": 0.2,
1825
+ "rewards/accuracies": 1.0,
1826
+ "rewards/generated": -5.32986307144165,
1827
+ "rewards/margins": 16.5743408203125,
1828
+ "rewards/real": 11.244477272033691,
1829
+ "step": 1150
1830
+ },
1831
+ {
1832
+ "epoch": 0.7421625079974408,
1833
+ "grad_norm": 2.4003697136809587,
1834
+ "learning_rate": 1.4331436699857753e-07,
1835
+ "logits/generated": -2.7689154148101807,
1836
+ "logits/real": -2.7181622982025146,
1837
+ "logps/generated": -128.201171875,
1838
+ "logps/real": -111.31385803222656,
1839
+ "loss": 0.2257,
1840
+ "rewards/accuracies": 1.0,
1841
+ "rewards/generated": -5.305639266967773,
1842
+ "rewards/margins": 16.98516273498535,
1843
+ "rewards/real": 11.679524421691895,
1844
+ "step": 1160
1845
+ },
1846
+ {
1847
+ "epoch": 0.7485604606525912,
1848
+ "grad_norm": 2.1662792343990898,
1849
+ "learning_rate": 1.3975817923186345e-07,
1850
+ "logits/generated": -2.7471747398376465,
1851
+ "logits/real": -2.745327949523926,
1852
+ "logps/generated": -131.89920043945312,
1853
+ "logps/real": -101.08728790283203,
1854
+ "loss": 0.2126,
1855
+ "rewards/accuracies": 0.987500011920929,
1856
+ "rewards/generated": -5.3999738693237305,
1857
+ "rewards/margins": 16.271684646606445,
1858
+ "rewards/real": 10.871710777282715,
1859
+ "step": 1170
1860
+ },
1861
+ {
1862
+ "epoch": 0.7549584133077415,
1863
+ "grad_norm": 1.9618990548812567,
1864
+ "learning_rate": 1.3620199146514935e-07,
1865
+ "logits/generated": -2.73884654045105,
1866
+ "logits/real": -2.792114734649658,
1867
+ "logps/generated": -139.20669555664062,
1868
+ "logps/real": -111.1391830444336,
1869
+ "loss": 0.2157,
1870
+ "rewards/accuracies": 1.0,
1871
+ "rewards/generated": -5.806328296661377,
1872
+ "rewards/margins": 17.363449096679688,
1873
+ "rewards/real": 11.557123184204102,
1874
+ "step": 1180
1875
+ },
1876
+ {
1877
+ "epoch": 0.7613563659628919,
1878
+ "grad_norm": 1.8565236814707453,
1879
+ "learning_rate": 1.326458036984353e-07,
1880
+ "logits/generated": -2.708221673965454,
1881
+ "logits/real": -2.8019871711730957,
1882
+ "logps/generated": -141.60244750976562,
1883
+ "logps/real": -103.365966796875,
1884
+ "loss": 0.2066,
1885
+ "rewards/accuracies": 0.987500011920929,
1886
+ "rewards/generated": -5.380032539367676,
1887
+ "rewards/margins": 15.911615371704102,
1888
+ "rewards/real": 10.53158187866211,
1889
+ "step": 1190
1890
+ },
1891
+ {
1892
+ "epoch": 0.7677543186180422,
1893
+ "grad_norm": 5.832771744417194,
1894
+ "learning_rate": 1.290896159317212e-07,
1895
+ "logits/generated": -2.7190277576446533,
1896
+ "logits/real": -2.782357692718506,
1897
+ "logps/generated": -133.46572875976562,
1898
+ "logps/real": -102.03229522705078,
1899
+ "loss": 0.2075,
1900
+ "rewards/accuracies": 1.0,
1901
+ "rewards/generated": -5.16973876953125,
1902
+ "rewards/margins": 16.539508819580078,
1903
+ "rewards/real": 11.369768142700195,
1904
+ "step": 1200
1905
+ },
1906
+ {
1907
+ "epoch": 0.7677543186180422,
1908
+ "eval_logits/generated": -2.7359671592712402,
1909
+ "eval_logits/real": -2.7372217178344727,
1910
+ "eval_logps/generated": -128.395751953125,
1911
+ "eval_logps/real": -111.5260009765625,
1912
+ "eval_loss": 0.23038950562477112,
1913
+ "eval_rewards/accuracies": 0.9921875,
1914
+ "eval_rewards/generated": -4.9678730964660645,
1915
+ "eval_rewards/margins": 15.083383560180664,
1916
+ "eval_rewards/real": 10.115509986877441,
1917
+ "eval_runtime": 69.835,
1918
+ "eval_samples_per_second": 7.16,
1919
+ "eval_steps_per_second": 0.229,
1920
+ "step": 1200
1921
+ },
1922
+ {
1923
+ "epoch": 0.7741522712731925,
1924
+ "grad_norm": 2.3644426695343914,
1925
+ "learning_rate": 1.255334281650071e-07,
1926
+ "logits/generated": -2.702448606491089,
1927
+ "logits/real": -2.849297046661377,
1928
+ "logps/generated": -133.25411987304688,
1929
+ "logps/real": -94.89964294433594,
1930
+ "loss": 0.2192,
1931
+ "rewards/accuracies": 1.0,
1932
+ "rewards/generated": -5.127745151519775,
1933
+ "rewards/margins": 15.297076225280762,
1934
+ "rewards/real": 10.169332504272461,
1935
+ "step": 1210
1936
+ },
1937
+ {
1938
+ "epoch": 0.780550223928343,
1939
+ "grad_norm": 4.2920815806287695,
1940
+ "learning_rate": 1.2197724039829303e-07,
1941
+ "logits/generated": -2.8339786529541016,
1942
+ "logits/real": -2.757995128631592,
1943
+ "logps/generated": -148.0692138671875,
1944
+ "logps/real": -121.7199478149414,
1945
+ "loss": 0.2159,
1946
+ "rewards/accuracies": 1.0,
1947
+ "rewards/generated": -6.0756330490112305,
1948
+ "rewards/margins": 18.304834365844727,
1949
+ "rewards/real": 12.229202270507812,
1950
+ "step": 1220
1951
+ },
1952
+ {
1953
+ "epoch": 0.7869481765834933,
1954
+ "grad_norm": 2.624742549146953,
1955
+ "learning_rate": 1.1842105263157894e-07,
1956
+ "logits/generated": -2.7471704483032227,
1957
+ "logits/real": -2.8416290283203125,
1958
+ "logps/generated": -147.3047637939453,
1959
+ "logps/real": -111.00262451171875,
1960
+ "loss": 0.2119,
1961
+ "rewards/accuracies": 1.0,
1962
+ "rewards/generated": -5.8865203857421875,
1963
+ "rewards/margins": 17.451772689819336,
1964
+ "rewards/real": 11.565254211425781,
1965
+ "step": 1230
1966
+ },
1967
+ {
1968
+ "epoch": 0.7933461292386437,
1969
+ "grad_norm": 2.1307989909566256,
1970
+ "learning_rate": 1.1486486486486487e-07,
1971
+ "logits/generated": -2.731872797012329,
1972
+ "logits/real": -2.8878135681152344,
1973
+ "logps/generated": -172.3035430908203,
1974
+ "logps/real": -110.19361877441406,
1975
+ "loss": 0.2202,
1976
+ "rewards/accuracies": 1.0,
1977
+ "rewards/generated": -7.301609039306641,
1978
+ "rewards/margins": 18.403263092041016,
1979
+ "rewards/real": 11.101654052734375,
1980
+ "step": 1240
1981
+ },
1982
+ {
1983
+ "epoch": 0.799744081893794,
1984
+ "grad_norm": 2.18810616018797,
1985
+ "learning_rate": 1.1130867709815078e-07,
1986
+ "logits/generated": -2.635159969329834,
1987
+ "logits/real": -2.8312065601348877,
1988
+ "logps/generated": -154.04550170898438,
1989
+ "logps/real": -104.9063949584961,
1990
+ "loss": 0.195,
1991
+ "rewards/accuracies": 1.0,
1992
+ "rewards/generated": -6.582805633544922,
1993
+ "rewards/margins": 17.949687957763672,
1994
+ "rewards/real": 11.366880416870117,
1995
+ "step": 1250
1996
+ },
1997
+ {
1998
+ "epoch": 0.8061420345489443,
1999
+ "grad_norm": 2.3243034253667,
2000
+ "learning_rate": 1.077524893314367e-07,
2001
+ "logits/generated": -2.7570877075195312,
2002
+ "logits/real": -2.676670789718628,
2003
+ "logps/generated": -132.24571228027344,
2004
+ "logps/real": -110.49552154541016,
2005
+ "loss": 0.2048,
2006
+ "rewards/accuracies": 1.0,
2007
+ "rewards/generated": -5.188769340515137,
2008
+ "rewards/margins": 16.681943893432617,
2009
+ "rewards/real": 11.49317455291748,
2010
+ "step": 1260
2011
+ },
2012
+ {
2013
+ "epoch": 0.8125399872040947,
2014
+ "grad_norm": 2.22786920352928,
2015
+ "learning_rate": 1.0419630156472262e-07,
2016
+ "logits/generated": -2.7040412425994873,
2017
+ "logits/real": -2.7337164878845215,
2018
+ "logps/generated": -129.7920379638672,
2019
+ "logps/real": -102.24781799316406,
2020
+ "loss": 0.2098,
2021
+ "rewards/accuracies": 1.0,
2022
+ "rewards/generated": -4.8722944259643555,
2023
+ "rewards/margins": 16.266225814819336,
2024
+ "rewards/real": 11.393930435180664,
2025
+ "step": 1270
2026
+ },
2027
+ {
2028
+ "epoch": 0.818937939859245,
2029
+ "grad_norm": 2.000001121079601,
2030
+ "learning_rate": 1.0064011379800854e-07,
2031
+ "logits/generated": -2.724949359893799,
2032
+ "logits/real": -2.7215514183044434,
2033
+ "logps/generated": -146.55990600585938,
2034
+ "logps/real": -114.5284423828125,
2035
+ "loss": 0.2211,
2036
+ "rewards/accuracies": 1.0,
2037
+ "rewards/generated": -5.995248794555664,
2038
+ "rewards/margins": 17.621685028076172,
2039
+ "rewards/real": 11.626437187194824,
2040
+ "step": 1280
2041
+ },
2042
+ {
2043
+ "epoch": 0.8253358925143954,
2044
+ "grad_norm": 2.220536708583519,
2045
+ "learning_rate": 9.708392603129445e-08,
2046
+ "logits/generated": -2.6592986583709717,
2047
+ "logits/real": -2.766054391860962,
2048
+ "logps/generated": -141.36831665039062,
2049
+ "logps/real": -96.46757507324219,
2050
+ "loss": 0.2112,
2051
+ "rewards/accuracies": 1.0,
2052
+ "rewards/generated": -5.908102989196777,
2053
+ "rewards/margins": 16.13393783569336,
2054
+ "rewards/real": 10.225834846496582,
2055
+ "step": 1290
2056
+ },
2057
+ {
2058
+ "epoch": 0.8317338451695457,
2059
+ "grad_norm": 1.8458396373240298,
2060
+ "learning_rate": 9.352773826458037e-08,
2061
+ "logits/generated": -2.7686662673950195,
2062
+ "logits/real": -2.6808652877807617,
2063
+ "logps/generated": -147.9801788330078,
2064
+ "logps/real": -109.27278900146484,
2065
+ "loss": 0.2098,
2066
+ "rewards/accuracies": 1.0,
2067
+ "rewards/generated": -6.193856716156006,
2068
+ "rewards/margins": 17.859333038330078,
2069
+ "rewards/real": 11.66547679901123,
2070
+ "step": 1300
2071
+ },
2072
+ {
2073
+ "epoch": 0.838131797824696,
2074
+ "grad_norm": 2.2735605764303304,
2075
+ "learning_rate": 8.997155049786629e-08,
2076
+ "logits/generated": -2.791935443878174,
2077
+ "logits/real": -2.7119717597961426,
2078
+ "logps/generated": -136.2453155517578,
2079
+ "logps/real": -101.1696548461914,
2080
+ "loss": 0.2048,
2081
+ "rewards/accuracies": 1.0,
2082
+ "rewards/generated": -5.072253227233887,
2083
+ "rewards/margins": 15.385726928710938,
2084
+ "rewards/real": 10.313471794128418,
2085
+ "step": 1310
2086
+ },
2087
+ {
2088
+ "epoch": 0.8445297504798465,
2089
+ "grad_norm": 7.320467978107467,
2090
+ "learning_rate": 8.64153627311522e-08,
2091
+ "logits/generated": -2.716169834136963,
2092
+ "logits/real": -2.7271907329559326,
2093
+ "logps/generated": -142.0037384033203,
2094
+ "logps/real": -96.90984344482422,
2095
+ "loss": 0.2039,
2096
+ "rewards/accuracies": 1.0,
2097
+ "rewards/generated": -5.719296455383301,
2098
+ "rewards/margins": 16.5465087890625,
2099
+ "rewards/real": 10.827211380004883,
2100
+ "step": 1320
2101
+ },
2102
+ {
2103
+ "epoch": 0.8509277031349968,
2104
+ "grad_norm": 2.1090442333698927,
2105
+ "learning_rate": 8.285917496443812e-08,
2106
+ "logits/generated": -2.7019193172454834,
2107
+ "logits/real": -2.7879014015197754,
2108
+ "logps/generated": -131.5638885498047,
2109
+ "logps/real": -103.13008880615234,
2110
+ "loss": 0.2036,
2111
+ "rewards/accuracies": 1.0,
2112
+ "rewards/generated": -5.254150390625,
2113
+ "rewards/margins": 15.576313972473145,
2114
+ "rewards/real": 10.322164535522461,
2115
+ "step": 1330
2116
+ },
2117
+ {
2118
+ "epoch": 0.8573256557901472,
2119
+ "grad_norm": 1.9764553854622655,
2120
+ "learning_rate": 7.930298719772404e-08,
2121
+ "logits/generated": -2.7741990089416504,
2122
+ "logits/real": -2.786623954772949,
2123
+ "logps/generated": -128.60775756835938,
2124
+ "logps/real": -95.55816650390625,
2125
+ "loss": 0.2071,
2126
+ "rewards/accuracies": 1.0,
2127
+ "rewards/generated": -4.881311893463135,
2128
+ "rewards/margins": 15.2656888961792,
2129
+ "rewards/real": 10.384378433227539,
2130
+ "step": 1340
2131
+ },
2132
+ {
2133
+ "epoch": 0.8637236084452975,
2134
+ "grad_norm": 2.4449760593366765,
2135
+ "learning_rate": 7.574679943100994e-08,
2136
+ "logits/generated": -2.7581117153167725,
2137
+ "logits/real": -2.8711161613464355,
2138
+ "logps/generated": -159.32579040527344,
2139
+ "logps/real": -127.8974838256836,
2140
+ "loss": 0.2075,
2141
+ "rewards/accuracies": 1.0,
2142
+ "rewards/generated": -6.493968963623047,
2143
+ "rewards/margins": 19.491052627563477,
2144
+ "rewards/real": 12.997082710266113,
2145
+ "step": 1350
2146
+ },
2147
+ {
2148
+ "epoch": 0.8701215611004478,
2149
+ "grad_norm": 1.8244220811357807,
2150
+ "learning_rate": 7.219061166429587e-08,
2151
+ "logits/generated": -2.740255832672119,
2152
+ "logits/real": -2.8236019611358643,
2153
+ "logps/generated": -140.54568481445312,
2154
+ "logps/real": -108.64134216308594,
2155
+ "loss": 0.1994,
2156
+ "rewards/accuracies": 1.0,
2157
+ "rewards/generated": -5.692780494689941,
2158
+ "rewards/margins": 16.945453643798828,
2159
+ "rewards/real": 11.25267219543457,
2160
+ "step": 1360
2161
+ },
2162
+ {
2163
+ "epoch": 0.8765195137555982,
2164
+ "grad_norm": 2.9020057472557528,
2165
+ "learning_rate": 6.863442389758179e-08,
2166
+ "logits/generated": -2.748126745223999,
2167
+ "logits/real": -2.682790517807007,
2168
+ "logps/generated": -129.32846069335938,
2169
+ "logps/real": -92.95919036865234,
2170
+ "loss": 0.2034,
2171
+ "rewards/accuracies": 1.0,
2172
+ "rewards/generated": -5.053739547729492,
2173
+ "rewards/margins": 15.391085624694824,
2174
+ "rewards/real": 10.337347030639648,
2175
+ "step": 1370
2176
+ },
2177
+ {
2178
+ "epoch": 0.8829174664107485,
2179
+ "grad_norm": 2.342066981273815,
2180
+ "learning_rate": 6.507823613086771e-08,
2181
+ "logits/generated": -2.734999179840088,
2182
+ "logits/real": -2.705516815185547,
2183
+ "logps/generated": -126.626708984375,
2184
+ "logps/real": -108.821044921875,
2185
+ "loss": 0.2083,
2186
+ "rewards/accuracies": 1.0,
2187
+ "rewards/generated": -5.249380111694336,
2188
+ "rewards/margins": 17.1163387298584,
2189
+ "rewards/real": 11.866960525512695,
2190
+ "step": 1380
2191
+ },
2192
+ {
2193
+ "epoch": 0.889315419065899,
2194
+ "grad_norm": 2.2461978230592665,
2195
+ "learning_rate": 6.152204836415363e-08,
2196
+ "logits/generated": -2.715066432952881,
2197
+ "logits/real": -2.795833110809326,
2198
+ "logps/generated": -136.6764373779297,
2199
+ "logps/real": -112.36259460449219,
2200
+ "loss": 0.219,
2201
+ "rewards/accuracies": 1.0,
2202
+ "rewards/generated": -5.541347026824951,
2203
+ "rewards/margins": 17.154422760009766,
2204
+ "rewards/real": 11.613073348999023,
2205
+ "step": 1390
2206
+ },
2207
+ {
2208
+ "epoch": 0.8957133717210493,
2209
+ "grad_norm": 1.9643094015838993,
2210
+ "learning_rate": 5.796586059743954e-08,
2211
+ "logits/generated": -2.7037057876586914,
2212
+ "logits/real": -2.892324447631836,
2213
+ "logps/generated": -144.58990478515625,
2214
+ "logps/real": -93.6194839477539,
2215
+ "loss": 0.1961,
2216
+ "rewards/accuracies": 1.0,
2217
+ "rewards/generated": -5.912665843963623,
2218
+ "rewards/margins": 16.394804000854492,
2219
+ "rewards/real": 10.482137680053711,
2220
+ "step": 1400
2221
+ },
2222
+ {
2223
+ "epoch": 0.8957133717210493,
2224
+ "eval_logits/generated": -2.731992244720459,
2225
+ "eval_logits/real": -2.7331748008728027,
2226
+ "eval_logps/generated": -128.72311401367188,
2227
+ "eval_logps/real": -111.41729736328125,
2228
+ "eval_loss": 0.22951382398605347,
2229
+ "eval_rewards/accuracies": 0.9921875,
2230
+ "eval_rewards/generated": -5.000611305236816,
2231
+ "eval_rewards/margins": 15.126992225646973,
2232
+ "eval_rewards/real": 10.12637996673584,
2233
+ "eval_runtime": 68.2753,
2234
+ "eval_samples_per_second": 7.323,
2235
+ "eval_steps_per_second": 0.234,
2236
+ "step": 1400
2237
+ },
2238
+ {
2239
+ "epoch": 0.9021113243761996,
2240
+ "grad_norm": 1.9508599183671798,
2241
+ "learning_rate": 5.4409672830725456e-08,
2242
+ "logits/generated": -2.771930694580078,
2243
+ "logits/real": -2.9047772884368896,
2244
+ "logps/generated": -157.674072265625,
2245
+ "logps/real": -95.12882232666016,
2246
+ "loss": 0.2091,
2247
+ "rewards/accuracies": 1.0,
2248
+ "rewards/generated": -6.542889595031738,
2249
+ "rewards/margins": 16.641141891479492,
2250
+ "rewards/real": 10.098252296447754,
2251
+ "step": 1410
2252
+ },
2253
+ {
2254
+ "epoch": 0.90850927703135,
2255
+ "grad_norm": 8.200787208344204,
2256
+ "learning_rate": 5.0853485064011376e-08,
2257
+ "logits/generated": -2.743286609649658,
2258
+ "logits/real": -2.764103412628174,
2259
+ "logps/generated": -124.41139221191406,
2260
+ "logps/real": -113.6160659790039,
2261
+ "loss": 0.2089,
2262
+ "rewards/accuracies": 1.0,
2263
+ "rewards/generated": -4.869805812835693,
2264
+ "rewards/margins": 16.73537826538086,
2265
+ "rewards/real": 11.865571975708008,
2266
+ "step": 1420
2267
+ },
2268
+ {
2269
+ "epoch": 0.9149072296865003,
2270
+ "grad_norm": 4.459379575662199,
2271
+ "learning_rate": 4.72972972972973e-08,
2272
+ "logits/generated": -2.746967077255249,
2273
+ "logits/real": -2.726330280303955,
2274
+ "logps/generated": -127.21881103515625,
2275
+ "logps/real": -105.5604248046875,
2276
+ "loss": 0.2142,
2277
+ "rewards/accuracies": 1.0,
2278
+ "rewards/generated": -4.854151248931885,
2279
+ "rewards/margins": 16.145641326904297,
2280
+ "rewards/real": 11.291491508483887,
2281
+ "step": 1430
2282
+ },
2283
+ {
2284
+ "epoch": 0.9213051823416507,
2285
+ "grad_norm": 2.74872128438021,
2286
+ "learning_rate": 4.374110953058322e-08,
2287
+ "logits/generated": -2.7054824829101562,
2288
+ "logits/real": -2.7788712978363037,
2289
+ "logps/generated": -141.21192932128906,
2290
+ "logps/real": -107.06086730957031,
2291
+ "loss": 0.2159,
2292
+ "rewards/accuracies": 1.0,
2293
+ "rewards/generated": -5.830003261566162,
2294
+ "rewards/margins": 17.112892150878906,
2295
+ "rewards/real": 11.282888412475586,
2296
+ "step": 1440
2297
+ },
2298
+ {
2299
+ "epoch": 0.927703134996801,
2300
+ "grad_norm": 2.4246159341403537,
2301
+ "learning_rate": 4.018492176386913e-08,
2302
+ "logits/generated": -2.6539907455444336,
2303
+ "logits/real": -2.773902177810669,
2304
+ "logps/generated": -130.4512939453125,
2305
+ "logps/real": -101.6545181274414,
2306
+ "loss": 0.2101,
2307
+ "rewards/accuracies": 1.0,
2308
+ "rewards/generated": -4.960906028747559,
2309
+ "rewards/margins": 15.910059928894043,
2310
+ "rewards/real": 10.949153900146484,
2311
+ "step": 1450
2312
+ },
2313
+ {
2314
+ "epoch": 0.9341010876519513,
2315
+ "grad_norm": 1.8673176828484437,
2316
+ "learning_rate": 3.6628733997155046e-08,
2317
+ "logits/generated": -2.7114219665527344,
2318
+ "logits/real": -2.8677616119384766,
2319
+ "logps/generated": -130.7792510986328,
2320
+ "logps/real": -88.69828796386719,
2321
+ "loss": 0.1885,
2322
+ "rewards/accuracies": 1.0,
2323
+ "rewards/generated": -5.1922407150268555,
2324
+ "rewards/margins": 14.514605522155762,
2325
+ "rewards/real": 9.32236385345459,
2326
+ "step": 1460
2327
+ },
2328
+ {
2329
+ "epoch": 0.9404990403071017,
2330
+ "grad_norm": 2.167805338937868,
2331
+ "learning_rate": 3.3072546230440967e-08,
2332
+ "logits/generated": -2.709365129470825,
2333
+ "logits/real": -2.7279558181762695,
2334
+ "logps/generated": -129.8989715576172,
2335
+ "logps/real": -96.73854064941406,
2336
+ "loss": 0.1966,
2337
+ "rewards/accuracies": 1.0,
2338
+ "rewards/generated": -5.115813255310059,
2339
+ "rewards/margins": 15.31916332244873,
2340
+ "rewards/real": 10.203348159790039,
2341
+ "step": 1470
2342
+ },
2343
+ {
2344
+ "epoch": 0.946896992962252,
2345
+ "grad_norm": 2.343424293796162,
2346
+ "learning_rate": 2.9516358463726884e-08,
2347
+ "logits/generated": -2.6958348751068115,
2348
+ "logits/real": -2.7627506256103516,
2349
+ "logps/generated": -132.286376953125,
2350
+ "logps/real": -85.90814971923828,
2351
+ "loss": 0.2066,
2352
+ "rewards/accuracies": 1.0,
2353
+ "rewards/generated": -5.348180294036865,
2354
+ "rewards/margins": 14.917572021484375,
2355
+ "rewards/real": 9.569391250610352,
2356
+ "step": 1480
2357
+ },
2358
+ {
2359
+ "epoch": 0.9532949456174025,
2360
+ "grad_norm": 2.443526667812465,
2361
+ "learning_rate": 2.59601706970128e-08,
2362
+ "logits/generated": -2.731065273284912,
2363
+ "logits/real": -2.722964286804199,
2364
+ "logps/generated": -144.53329467773438,
2365
+ "logps/real": -111.11639404296875,
2366
+ "loss": 0.2125,
2367
+ "rewards/accuracies": 1.0,
2368
+ "rewards/generated": -5.713242530822754,
2369
+ "rewards/margins": 17.423105239868164,
2370
+ "rewards/real": 11.70986270904541,
2371
+ "step": 1490
2372
+ },
2373
+ {
2374
+ "epoch": 0.9596928982725528,
2375
+ "grad_norm": 2.5802622540203726,
2376
+ "learning_rate": 2.240398293029872e-08,
2377
+ "logits/generated": -2.7666499614715576,
2378
+ "logits/real": -2.7990810871124268,
2379
+ "logps/generated": -151.97225952148438,
2380
+ "logps/real": -102.18070983886719,
2381
+ "loss": 0.2062,
2382
+ "rewards/accuracies": 1.0,
2383
+ "rewards/generated": -6.25309419631958,
2384
+ "rewards/margins": 17.280641555786133,
2385
+ "rewards/real": 11.027546882629395,
2386
+ "step": 1500
2387
+ },
2388
+ {
2389
+ "epoch": 0.9660908509277031,
2390
+ "grad_norm": 4.900620511000788,
2391
+ "learning_rate": 1.8847795163584636e-08,
2392
+ "logits/generated": -2.7653520107269287,
2393
+ "logits/real": -2.737842082977295,
2394
+ "logps/generated": -132.54611206054688,
2395
+ "logps/real": -105.3329086303711,
2396
+ "loss": 0.2044,
2397
+ "rewards/accuracies": 1.0,
2398
+ "rewards/generated": -5.18030309677124,
2399
+ "rewards/margins": 16.7701358795166,
2400
+ "rewards/real": 11.58983325958252,
2401
+ "step": 1510
2402
+ },
2403
+ {
2404
+ "epoch": 0.9724888035828535,
2405
+ "grad_norm": 2.2165972838393935,
2406
+ "learning_rate": 1.5291607396870554e-08,
2407
+ "logits/generated": -2.709165334701538,
2408
+ "logits/real": -2.7248358726501465,
2409
+ "logps/generated": -131.63613891601562,
2410
+ "logps/real": -111.87278747558594,
2411
+ "loss": 0.2149,
2412
+ "rewards/accuracies": 1.0,
2413
+ "rewards/generated": -5.262539863586426,
2414
+ "rewards/margins": 17.233844757080078,
2415
+ "rewards/real": 11.971303939819336,
2416
+ "step": 1520
2417
+ },
2418
+ {
2419
+ "epoch": 0.9788867562380038,
2420
+ "grad_norm": 4.970086199198337,
2421
+ "learning_rate": 1.1735419630156473e-08,
2422
+ "logits/generated": -2.7099928855895996,
2423
+ "logits/real": -2.7440731525421143,
2424
+ "logps/generated": -127.85652160644531,
2425
+ "logps/real": -98.89935302734375,
2426
+ "loss": 0.1979,
2427
+ "rewards/accuracies": 1.0,
2428
+ "rewards/generated": -4.844382286071777,
2429
+ "rewards/margins": 15.250204086303711,
2430
+ "rewards/real": 10.4058198928833,
2431
+ "step": 1530
2432
+ },
2433
+ {
2434
+ "epoch": 0.9852847088931542,
2435
+ "grad_norm": 2.031754041409757,
2436
+ "learning_rate": 8.179231863442388e-09,
2437
+ "logits/generated": -2.6747050285339355,
2438
+ "logits/real": -2.6660659313201904,
2439
+ "logps/generated": -139.36756896972656,
2440
+ "logps/real": -103.53269958496094,
2441
+ "loss": 0.2106,
2442
+ "rewards/accuracies": 1.0,
2443
+ "rewards/generated": -5.578815460205078,
2444
+ "rewards/margins": 16.89048194885254,
2445
+ "rewards/real": 11.311666488647461,
2446
+ "step": 1540
2447
+ },
2448
+ {
2449
+ "epoch": 0.9916826615483045,
2450
+ "grad_norm": 2.2877158724102564,
2451
+ "learning_rate": 4.623044096728307e-09,
2452
+ "logits/generated": -2.660313606262207,
2453
+ "logits/real": -2.855464220046997,
2454
+ "logps/generated": -145.4078826904297,
2455
+ "logps/real": -106.03314208984375,
2456
+ "loss": 0.2083,
2457
+ "rewards/accuracies": 1.0,
2458
+ "rewards/generated": -5.917939186096191,
2459
+ "rewards/margins": 17.319114685058594,
2460
+ "rewards/real": 11.40117359161377,
2461
+ "step": 1550
2462
+ },
2463
+ {
2464
+ "epoch": 0.9980806142034548,
2465
+ "grad_norm": 2.427910151920255,
2466
+ "learning_rate": 1.0668563300142248e-09,
2467
+ "logits/generated": -2.697697401046753,
2468
+ "logits/real": -2.7648138999938965,
2469
+ "logps/generated": -132.71572875976562,
2470
+ "logps/real": -100.33382415771484,
2471
+ "loss": 0.2061,
2472
+ "rewards/accuracies": 1.0,
2473
+ "rewards/generated": -4.841822624206543,
2474
+ "rewards/margins": 15.7789306640625,
2475
+ "rewards/real": 10.937108039855957,
2476
+ "step": 1560
2477
+ },
2478
+ {
2479
+ "epoch": 1.0,
2480
+ "step": 1563,
2481
+ "total_flos": 0.0,
2482
+ "train_loss": 0.2369839982275618,
2483
+ "train_runtime": 14627.5004,
2484
+ "train_samples_per_second": 3.418,
2485
+ "train_steps_per_second": 0.107
2486
+ }
2487
+ ],
2488
+ "logging_steps": 10,
2489
+ "max_steps": 1563,
2490
+ "num_input_tokens_seen": 0,
2491
+ "num_train_epochs": 1,
2492
+ "save_steps": 500,
2493
+ "stateful_callbacks": {
2494
+ "TrainerControl": {
2495
+ "args": {
2496
+ "should_epoch_stop": false,
2497
+ "should_evaluate": false,
2498
+ "should_log": false,
2499
+ "should_save": true,
2500
+ "should_training_stop": true
2501
+ },
2502
+ "attributes": {}
2503
+ }
2504
+ },
2505
+ "total_flos": 0.0,
2506
+ "train_batch_size": 8,
2507
+ "trial_name": null,
2508
+ "trial_params": null
2509
+ }