mnoukhov commited on
Commit
d4fbdad
1 Parent(s): b5cfc68

Training in progress, step 252, checkpoint

Browse files
checkpoint-252/adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "dense_4h_to_h",
24
  "query_key_value",
25
- "dense",
26
- "dense_h_to_4h"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "dense_h_to_4h",
24
  "dense_4h_to_h",
25
  "query_key_value",
26
+ "dense"
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-252/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbd62e42b19c59ed514333838ba090f7e306aa508856a1ec5f15755f8de0bc91
3
  size 25192592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77dafc85bc2620e59612b35d70ad335f0904786e6b6bd08ab47d1ab414ff40ed
3
  size 25192592
checkpoint-252/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bafbacbdff4fd0f3d2b7442288b4eeb33475142089923784e4b7d922b553da4a
3
  size 50493050
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3b83517807e8ec5f7da79c29749c3a3ba25e45c5f4176966a6c252218d63ccc
3
  size 50493050
checkpoint-252/trainer_state.json CHANGED
@@ -10,96 +10,96 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.2016,
13
- "eval_logps/chosen": -106.34315490722656,
14
  "eval_logps/ref_chosen": -70.265625,
15
  "eval_logps/ref_rejected": -63.51186752319336,
16
- "eval_logps/rejected": -106.34315490722656,
17
- "eval_loss": 0.6557361483573914,
18
- "eval_rewards/accuracies": 0.6162024772678456,
19
- "eval_rewards/chosen": -1.8038759231567383,
20
- "eval_rewards/margins": 0.1433669477701187,
21
- "eval_rewards/rejected": -1.9472428560256958,
22
- "eval_runtime": 2599.3441,
23
- "eval_samples_per_second": 32.24,
24
- "eval_steps_per_second": 4.03,
25
  "step": 63
26
  },
27
  {
28
  "epoch": 0.32,
29
- "grad_norm": 5.536309242248535,
30
  "learning_rate": 2.3016987391917016e-05,
31
- "logps/chosen": -187.26882934570312,
32
- "logps/ref_chosen": -199.84158325195312,
33
- "logps/ref_rejected": -140.63331604003906,
34
- "logps/rejected": -187.26882934570312,
35
- "loss": 0.4864,
36
- "rewards/accuracies": 0.7975000143051147,
37
- "rewards/chosen": 0.6286371350288391,
38
- "rewards/margins": 0.6605393886566162,
39
- "rewards/rejected": -0.031902212649583817,
40
  "step": 100
41
  },
42
  {
43
  "epoch": 0.4032,
44
- "eval_logps/chosen": -123.7810287475586,
45
  "eval_logps/ref_chosen": -70.265625,
46
  "eval_logps/ref_rejected": -63.51186752319336,
47
- "eval_logps/rejected": -123.7810287475586,
48
- "eval_loss": 0.6727969646453857,
49
- "eval_rewards/accuracies": 0.5971814515166702,
50
- "eval_rewards/chosen": -2.6757709980010986,
51
- "eval_rewards/margins": 0.16077223420143127,
52
- "eval_rewards/rejected": -2.836543321609497,
53
- "eval_runtime": 2597.2346,
54
- "eval_samples_per_second": 32.266,
55
- "eval_steps_per_second": 4.034,
56
  "step": 126
57
  },
58
  {
59
  "epoch": 0.6048,
60
- "eval_logps/chosen": -136.31578063964844,
61
  "eval_logps/ref_chosen": -70.265625,
62
  "eval_logps/ref_rejected": -63.51186752319336,
63
- "eval_logps/rejected": -136.31578063964844,
64
- "eval_loss": 0.683315098285675,
65
- "eval_rewards/accuracies": 0.5913224028066156,
66
- "eval_rewards/chosen": -3.3025076389312744,
67
- "eval_rewards/margins": 0.17308557033538818,
68
- "eval_rewards/rejected": -3.475592851638794,
69
- "eval_runtime": 2597.1774,
70
- "eval_samples_per_second": 32.267,
71
- "eval_steps_per_second": 4.034,
72
  "step": 189
73
  },
74
  {
75
  "epoch": 0.64,
76
- "grad_norm": 6.213208198547363,
77
  "learning_rate": 8.569611578954186e-06,
78
- "logps/chosen": -181.8887176513672,
79
- "logps/ref_chosen": -198.8422088623047,
80
- "logps/ref_rejected": -139.47769165039062,
81
- "logps/rejected": -181.8887176513672,
82
- "loss": 0.3802,
83
- "rewards/accuracies": 0.8515625,
84
- "rewards/chosen": 0.8476755023002625,
85
- "rewards/margins": 1.189441204071045,
86
- "rewards/rejected": -0.3417656719684601,
87
  "step": 200
88
  },
89
  {
90
  "epoch": 0.8064,
91
- "eval_logps/chosen": -139.97152709960938,
92
  "eval_logps/ref_chosen": -70.265625,
93
  "eval_logps/ref_rejected": -63.51186752319336,
94
- "eval_logps/rejected": -139.97152709960938,
95
- "eval_loss": 0.6932100653648376,
96
- "eval_rewards/accuracies": 0.5844013269373046,
97
- "eval_rewards/chosen": -3.4852941036224365,
98
- "eval_rewards/margins": 0.16311165690422058,
99
- "eval_rewards/rejected": -3.6484062671661377,
100
- "eval_runtime": 2597.2713,
101
- "eval_samples_per_second": 32.265,
102
- "eval_steps_per_second": 4.033,
103
  "step": 252
104
  }
105
  ],
 
10
  "log_history": [
11
  {
12
  "epoch": 0.2016,
13
+ "eval_logps/chosen": -70.4515609741211,
14
  "eval_logps/ref_chosen": -70.265625,
15
  "eval_logps/ref_rejected": -63.51186752319336,
16
+ "eval_logps/rejected": -70.4515609741211,
17
+ "eval_loss": 0.700020968914032,
18
+ "eval_rewards/accuracies": 0.5043316388630343,
19
+ "eval_rewards/chosen": -0.009296582080423832,
20
+ "eval_rewards/margins": -0.0003258216893300414,
21
+ "eval_rewards/rejected": -0.00897076167166233,
22
+ "eval_runtime": 2598.2788,
23
+ "eval_samples_per_second": 32.253,
24
+ "eval_steps_per_second": 4.032,
25
  "step": 63
26
  },
27
  {
28
  "epoch": 0.32,
29
+ "grad_norm": 7.734374523162842,
30
  "learning_rate": 2.3016987391917016e-05,
31
+ "logps/chosen": -164.50128173828125,
32
+ "logps/ref_chosen": -159.2122802734375,
33
+ "logps/ref_rejected": -181.2625732421875,
34
+ "logps/rejected": -164.50128173828125,
35
+ "loss": 0.6682,
36
+ "rewards/accuracies": 0.5696874856948853,
37
+ "rewards/chosen": -0.26445069909095764,
38
+ "rewards/margins": 0.10393363237380981,
39
+ "rewards/rejected": -0.36838433146476746,
40
  "step": 100
41
  },
42
  {
43
  "epoch": 0.4032,
44
+ "eval_logps/chosen": -70.71819305419922,
45
  "eval_logps/ref_chosen": -70.265625,
46
  "eval_logps/ref_rejected": -63.51186752319336,
47
+ "eval_logps/rejected": -70.71819305419922,
48
+ "eval_loss": 0.6713600158691406,
49
+ "eval_rewards/accuracies": 0.5957017732273693,
50
+ "eval_rewards/chosen": -0.022628214210271835,
51
+ "eval_rewards/margins": 0.06350255757570267,
52
+ "eval_rewards/rejected": -0.0861307755112648,
53
+ "eval_runtime": 2598.0044,
54
+ "eval_samples_per_second": 32.256,
55
+ "eval_steps_per_second": 4.032,
56
  "step": 126
57
  },
58
  {
59
  "epoch": 0.6048,
60
+ "eval_logps/chosen": -71.90546417236328,
61
  "eval_logps/ref_chosen": -70.265625,
62
  "eval_logps/ref_rejected": -63.51186752319336,
63
+ "eval_logps/rejected": -71.90546417236328,
64
+ "eval_loss": 0.6622863411903381,
65
+ "eval_rewards/accuracies": 0.6141738860647717,
66
+ "eval_rewards/chosen": -0.08199150860309601,
67
+ "eval_rewards/margins": 0.08873386681079865,
68
+ "eval_rewards/rejected": -0.17072536051273346,
69
+ "eval_runtime": 2596.18,
70
+ "eval_samples_per_second": 32.279,
71
+ "eval_steps_per_second": 4.035,
72
  "step": 189
73
  },
74
  {
75
  "epoch": 0.64,
76
+ "grad_norm": 5.594386100769043,
77
  "learning_rate": 8.569611578954186e-06,
78
+ "logps/chosen": -161.8042755126953,
79
+ "logps/ref_chosen": -158.50271606445312,
80
+ "logps/ref_rejected": -179.81715393066406,
81
+ "logps/rejected": -161.8042755126953,
82
+ "loss": 0.6423,
83
+ "rewards/accuracies": 0.6114062666893005,
84
+ "rewards/chosen": -0.16507746279239655,
85
+ "rewards/margins": 0.20611368119716644,
86
+ "rewards/rejected": -0.371191143989563,
87
  "step": 200
88
  },
89
  {
90
  "epoch": 0.8064,
91
+ "eval_logps/chosen": -72.42017364501953,
92
  "eval_logps/ref_chosen": -70.265625,
93
  "eval_logps/ref_rejected": -63.51186752319336,
94
+ "eval_logps/rejected": -72.42017364501953,
95
+ "eval_loss": 0.6550462245941162,
96
+ "eval_rewards/accuracies": 0.6268585475286986,
97
+ "eval_rewards/chosen": -0.10772793740034103,
98
+ "eval_rewards/margins": 0.10725179314613342,
99
+ "eval_rewards/rejected": -0.21497976779937744,
100
+ "eval_runtime": 2596.7824,
101
+ "eval_samples_per_second": 32.271,
102
+ "eval_steps_per_second": 4.034,
103
  "step": 252
104
  }
105
  ],
checkpoint-252/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46ff4287a15bff07546a9b59f8721b8272bf4ace050683003790d085d41bfcd5
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6686dc40b6db461a3b988241b5bc24010b480d0524615f6ccf956bf69c70fdf
3
  size 5176