timewanderer commited on
Commit
6df7fb6
1 Parent(s): 15a6189

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47fa5bdd9ebe645b2aa80700c20cbf7176f0e6e274d2b1ea405cf63ea345869d
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391187adb9deb662903fc270652f066549287835d564aa5a92416b302df77e71
3
  size 268290900
run-1/checkpoint-2544/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69eb47dc893c05ba7230c6a876fade5c74cf600f71b3fb1c999a915cf4a8f07f
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e906c7638b0248c9885b5f3e966cf1e8f0476b771b3a47b75273ae1e9f50563
3
  size 268290900
run-1/checkpoint-2544/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dad5182a3f14ff6ef0da349116975eeb7b9e4efc16dbb4371e39b50b0d6f0795
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2edac191b883f41f9cb0a2016ff82c05343fa1b1f4144984dea88336b70bab2
3
  size 536643898
run-1/checkpoint-2544/trainer_state.json CHANGED
@@ -10,100 +10,100 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5858064516129032,
14
- "eval_loss": 0.20023050904273987,
15
- "eval_runtime": 5.4304,
16
- "eval_samples_per_second": 570.856,
17
- "eval_steps_per_second": 11.97,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5323805212974548,
23
  "learning_rate": 1.606918238993711e-05,
24
- "loss": 0.3211,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.817741935483871,
30
- "eval_loss": 0.09780898690223694,
31
- "eval_runtime": 5.6251,
32
- "eval_samples_per_second": 551.101,
33
- "eval_steps_per_second": 11.555,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8761290322580645,
39
- "eval_loss": 0.06598751991987228,
40
- "eval_runtime": 5.3523,
41
- "eval_samples_per_second": 579.185,
42
- "eval_steps_per_second": 12.144,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.45211926102638245,
48
  "learning_rate": 1.2138364779874214e-05,
49
- "loss": 0.1134,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.8970967741935484,
55
- "eval_loss": 0.05154659226536751,
56
- "eval_runtime": 5.4338,
57
- "eval_samples_per_second": 570.506,
58
- "eval_steps_per_second": 11.962,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.31171318888664246,
64
  "learning_rate": 8.207547169811321e-06,
65
- "loss": 0.0736,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9051612903225806,
71
- "eval_loss": 0.04292130842804909,
72
- "eval_runtime": 5.5921,
73
- "eval_samples_per_second": 554.351,
74
- "eval_steps_per_second": 11.623,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9090322580645162,
80
- "eval_loss": 0.03817291185259819,
81
- "eval_runtime": 5.3716,
82
- "eval_samples_per_second": 577.105,
83
- "eval_steps_per_second": 12.101,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
- "grad_norm": 0.2767144441604614,
89
  "learning_rate": 4.276729559748428e-06,
90
- "loss": 0.0589,
91
  "step": 2000
92
  },
93
  {
94
  "epoch": 7.0,
95
- "eval_accuracy": 0.9135483870967742,
96
- "eval_loss": 0.03585055470466614,
97
- "eval_runtime": 5.5797,
98
- "eval_samples_per_second": 555.585,
99
- "eval_steps_per_second": 11.649,
100
  "step": 2226
101
  },
102
  {
103
  "epoch": 7.861635220125786,
104
- "grad_norm": 0.27693912386894226,
105
  "learning_rate": 3.459119496855346e-07,
106
- "loss": 0.0529,
107
  "step": 2500
108
  }
109
  ],
@@ -128,8 +128,8 @@
128
  "train_batch_size": 48,
129
  "trial_name": null,
130
  "trial_params": {
131
- "alpha": 0.1709217232718393,
132
  "num_train_epochs": 8,
133
- "temperature": 11
134
  }
135
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5829032258064516,
14
+ "eval_loss": 0.1985393762588501,
15
+ "eval_runtime": 5.6203,
16
+ "eval_samples_per_second": 551.572,
17
+ "eval_steps_per_second": 11.565,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5288612246513367,
23
  "learning_rate": 1.606918238993711e-05,
24
+ "loss": 0.3184,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8174193548387096,
30
+ "eval_loss": 0.09726663678884506,
31
+ "eval_runtime": 6.0438,
32
+ "eval_samples_per_second": 512.92,
33
+ "eval_steps_per_second": 10.755,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8764516129032258,
39
+ "eval_loss": 0.06577406078577042,
40
+ "eval_runtime": 5.6348,
41
+ "eval_samples_per_second": 550.155,
42
+ "eval_steps_per_second": 11.535,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.44873112440109253,
48
  "learning_rate": 1.2138364779874214e-05,
49
+ "loss": 0.1127,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.8964516129032258,
55
+ "eval_loss": 0.05149334296584129,
56
+ "eval_runtime": 5.7578,
57
+ "eval_samples_per_second": 538.401,
58
+ "eval_steps_per_second": 11.289,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.30973368883132935,
64
  "learning_rate": 8.207547169811321e-06,
65
+ "loss": 0.0733,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
  "eval_accuracy": 0.9051612903225806,
71
+ "eval_loss": 0.04290274158120155,
72
+ "eval_runtime": 5.9031,
73
+ "eval_samples_per_second": 525.15,
74
+ "eval_steps_per_second": 11.011,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
+ "eval_accuracy": 0.9093548387096774,
80
+ "eval_loss": 0.038180265575647354,
81
+ "eval_runtime": 5.6503,
82
+ "eval_samples_per_second": 548.642,
83
+ "eval_steps_per_second": 11.504,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
+ "grad_norm": 0.27582159638404846,
89
  "learning_rate": 4.276729559748428e-06,
90
+ "loss": 0.0587,
91
  "step": 2000
92
  },
93
  {
94
  "epoch": 7.0,
95
+ "eval_accuracy": 0.9141935483870968,
96
+ "eval_loss": 0.03587044030427933,
97
+ "eval_runtime": 5.962,
98
+ "eval_samples_per_second": 519.958,
99
+ "eval_steps_per_second": 10.902,
100
  "step": 2226
101
  },
102
  {
103
  "epoch": 7.861635220125786,
104
+ "grad_norm": 0.275245726108551,
105
  "learning_rate": 3.459119496855346e-07,
106
+ "loss": 0.0528,
107
  "step": 2500
108
  }
109
  ],
 
128
  "train_batch_size": 48,
129
  "trial_name": null,
130
  "trial_params": {
131
+ "alpha": 0.819089504077056,
132
  "num_train_epochs": 8,
133
+ "temperature": 12
134
  }
135
  }
run-1/checkpoint-2544/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78c9f8065f3863022051fc0cf21ad46e0468d9988de813731a8591806be5f3c9
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ad3761ee6c049b5f54cddb21d4403152bfaafdbfc2a13b71993419e99660838
3
  size 5240
run-2/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33620587b37c196969d3f412f74ef041dcfeee8258db5933dc3c4540e70b1e43
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:391187adb9deb662903fc270652f066549287835d564aa5a92416b302df77e71
3
  size 268290900
run-2/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bab979622485a1e70f82a64c3ec4aae99b0262391ec447afad3b7eae33df5ce
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc13f9b00992b49e7dfe94e9d1a650da3af781a61a99f9328ba16c032c275b8d
3
  size 536643898
run-2/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98041bd7cae455426e290a1a0ee683bd5dd30893f7451fec3a464ae8995b17e4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04366f62f8f88f5a8265df59adb051b320463277845db80e7fa43f13110c18c9
3
  size 1064
run-2/checkpoint-500/trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5887096774193549,
14
- "eval_loss": 0.19886387884616852,
15
- "eval_runtime": 5.1582,
16
- "eval_samples_per_second": 600.982,
17
- "eval_steps_per_second": 12.601,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5294517874717712,
23
- "learning_rate": 1.650593990216632e-05,
24
- "loss": 0.3203,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
- "max_steps": 2862,
30
  "num_input_tokens_seen": 0,
31
- "num_train_epochs": 9,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
@@ -46,8 +46,8 @@
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.38078945785669316,
50
- "num_train_epochs": 9,
51
- "temperature": 11
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5796774193548387,
14
+ "eval_loss": 0.1907225400209427,
15
+ "eval_runtime": 5.6367,
16
+ "eval_samples_per_second": 549.965,
17
+ "eval_steps_per_second": 11.532,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5174282193183899,
23
+ "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.3083,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 3180,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 10,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
 
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.4529889909540463,
50
+ "num_train_epochs": 10,
51
+ "temperature": 17
52
  }
53
  }
run-2/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a815016dfca6056394449b7eec53142e869a6e9ff738e6f9557bb4382010c025
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:721f120a8e258fc8c48320ec67ae2a387816a0061225dcba3c70a5da25825846
3
  size 5240
runs/Oct11_19-31-02_821d3e23518d/events.out.tfevents.1728678216.821d3e23518d.3094.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b40e04c0d9c558cbe125410be24b7e46355152735bdb62eee3c77377f159c32
3
+ size 13299
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ad3761ee6c049b5f54cddb21d4403152bfaafdbfc2a13b71993419e99660838
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:721f120a8e258fc8c48320ec67ae2a387816a0061225dcba3c70a5da25825846
3
  size 5240