timewanderer commited on
Commit
bcb2eeb
1 Parent(s): f762356

Training in progress, step 2226

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:744965fc651adbb182875f98c309e15905b940ddfa5ac317af007d18b2f3085e
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a461fedb48bd3ba08cf0db08a34f253e9782b2d81e15c6fa47b4f754bb7a3c2
3
  size 268290900
run-2/checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:159a38a62caabed9e1210e59e61e37481121dd41cf4c689d9dd1ff3a76ab2258
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffec1b640d7fef2ebc929e35aee46eb1e69a1c047f7259f54793032dfcc12be8
3
  size 268290900
run-2/checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d104b09460b4032759fd9392322e2f598820f8336681511fe529d62c7badd4ca
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc1f174ac7936bbb2b1ae74026852de9c65642eaa3130e2e8c0ef7f9371e11d6
3
  size 536643898
run-2/checkpoint-2000/trainer_state.json CHANGED
@@ -10,84 +10,84 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6445161290322581,
14
- "eval_loss": 0.29249975085258484,
15
- "eval_runtime": 5.4218,
16
- "eval_samples_per_second": 571.762,
17
- "eval_steps_per_second": 11.989,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6633772850036621,
23
  "learning_rate": 1.550763701707098e-05,
24
- "loss": 0.4626,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8380645161290322,
30
- "eval_loss": 0.12248263508081436,
31
- "eval_runtime": 5.3956,
32
- "eval_samples_per_second": 574.542,
33
- "eval_steps_per_second": 12.047,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8887096774193548,
39
- "eval_loss": 0.07343784719705582,
40
- "eval_runtime": 5.2977,
41
- "eval_samples_per_second": 585.157,
42
- "eval_steps_per_second": 12.269,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.59869384765625,
48
  "learning_rate": 1.101527403414196e-05,
49
- "loss": 0.1438,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9045161290322581,
55
- "eval_loss": 0.05444410815834999,
56
- "eval_runtime": 5.5714,
57
- "eval_samples_per_second": 556.417,
58
- "eval_steps_per_second": 11.667,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.4075869023799896,
64
  "learning_rate": 6.522911051212939e-06,
65
- "loss": 0.0845,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.9141935483870968,
71
- "eval_loss": 0.045045968145132065,
72
- "eval_runtime": 5.3274,
73
- "eval_samples_per_second": 581.894,
74
- "eval_steps_per_second": 12.201,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9190322580645162,
80
- "eval_loss": 0.04080257937312126,
81
- "eval_runtime": 5.3427,
82
- "eval_samples_per_second": 580.23,
83
- "eval_steps_per_second": 12.166,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
- "grad_norm": 0.352282851934433,
89
  "learning_rate": 2.0305480682839176e-06,
90
- "loss": 0.0669,
91
  "step": 2000
92
  }
93
  ],
@@ -112,8 +112,8 @@
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
- "alpha": 0.444035914041536,
116
  "num_train_epochs": 7,
117
- "temperature": 3
118
  }
119
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.603225806451613,
14
+ "eval_loss": 0.23406817018985748,
15
+ "eval_runtime": 5.4802,
16
+ "eval_samples_per_second": 565.673,
17
+ "eval_steps_per_second": 11.861,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.57925945520401,
23
  "learning_rate": 1.550763701707098e-05,
24
+ "loss": 0.3716,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8309677419354838,
30
+ "eval_loss": 0.1094982922077179,
31
+ "eval_runtime": 5.8681,
32
+ "eval_samples_per_second": 528.276,
33
+ "eval_steps_per_second": 11.077,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8819354838709678,
39
+ "eval_loss": 0.07135984301567078,
40
+ "eval_runtime": 5.4437,
41
+ "eval_samples_per_second": 569.468,
42
+ "eval_steps_per_second": 11.94,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5091490149497986,
48
  "learning_rate": 1.101527403414196e-05,
49
+ "loss": 0.1274,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.8980645161290323,
55
+ "eval_loss": 0.055049341171979904,
56
+ "eval_runtime": 5.4797,
57
+ "eval_samples_per_second": 565.721,
58
+ "eval_steps_per_second": 11.862,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3538859486579895,
64
  "learning_rate": 6.522911051212939e-06,
65
+ "loss": 0.0809,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
+ "eval_accuracy": 0.9061290322580645,
71
+ "eval_loss": 0.04594043269753456,
72
+ "eval_runtime": 5.6864,
73
+ "eval_samples_per_second": 545.158,
74
+ "eval_steps_per_second": 11.431,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
+ "eval_accuracy": 0.9090322580645162,
80
+ "eval_loss": 0.04156717658042908,
81
+ "eval_runtime": 5.586,
82
+ "eval_samples_per_second": 554.96,
83
+ "eval_steps_per_second": 11.636,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
+ "grad_norm": 0.31923583149909973,
89
  "learning_rate": 2.0305480682839176e-06,
90
+ "loss": 0.0654,
91
  "step": 2000
92
  }
93
  ],
 
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
+ "alpha": 0.49068870611003523,
116
  "num_train_epochs": 7,
117
+ "temperature": 5
118
  }
119
  }
run-2/checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22d5df12fd1120947d14fad0a0388bc240150e4fa0277e30f926f1ec12ddb8e9
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8759fe239e3e09af377de9d8ac9ed677e8c3fe7b8d3bf99d5396a42501d32db2
3
  size 5240
run-2/checkpoint-2226/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bde7c96d3a8960631be6fa8967080af250062b2283875ab44aeb97fc14d426d5
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a461fedb48bd3ba08cf0db08a34f253e9782b2d81e15c6fa47b4f754bb7a3c2
3
  size 268290900
run-2/checkpoint-2226/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af1ad9d9d7e7e4ebe5e7086535145004be6d4299c50840889d61a06edae7e922
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9b9a634ce49f802f3a40ab00a30a40ec119bdd38806e4f1dda97675bfdca962
3
  size 536643898
run-2/checkpoint-2226/trainer_state.json CHANGED
@@ -10,84 +10,84 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6445161290322581,
14
- "eval_loss": 0.29249975085258484,
15
- "eval_runtime": 5.4218,
16
- "eval_samples_per_second": 571.762,
17
- "eval_steps_per_second": 11.989,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6633772850036621,
23
  "learning_rate": 1.550763701707098e-05,
24
- "loss": 0.4626,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8380645161290322,
30
- "eval_loss": 0.12248263508081436,
31
- "eval_runtime": 5.3956,
32
- "eval_samples_per_second": 574.542,
33
- "eval_steps_per_second": 12.047,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8887096774193548,
39
- "eval_loss": 0.07343784719705582,
40
- "eval_runtime": 5.2977,
41
- "eval_samples_per_second": 585.157,
42
- "eval_steps_per_second": 12.269,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.59869384765625,
48
  "learning_rate": 1.101527403414196e-05,
49
- "loss": 0.1438,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9045161290322581,
55
- "eval_loss": 0.05444410815834999,
56
- "eval_runtime": 5.5714,
57
- "eval_samples_per_second": 556.417,
58
- "eval_steps_per_second": 11.667,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.4075869023799896,
64
  "learning_rate": 6.522911051212939e-06,
65
- "loss": 0.0845,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.9141935483870968,
71
- "eval_loss": 0.045045968145132065,
72
- "eval_runtime": 5.3274,
73
- "eval_samples_per_second": 581.894,
74
- "eval_steps_per_second": 12.201,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9190322580645162,
80
- "eval_loss": 0.04080257937312126,
81
- "eval_runtime": 5.3427,
82
- "eval_samples_per_second": 580.23,
83
- "eval_steps_per_second": 12.166,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
- "grad_norm": 0.352282851934433,
89
  "learning_rate": 2.0305480682839176e-06,
90
- "loss": 0.0669,
91
  "step": 2000
92
  }
93
  ],
@@ -112,8 +112,8 @@
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
- "alpha": 0.444035914041536,
116
  "num_train_epochs": 7,
117
- "temperature": 3
118
  }
119
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.603225806451613,
14
+ "eval_loss": 0.23406817018985748,
15
+ "eval_runtime": 5.4802,
16
+ "eval_samples_per_second": 565.673,
17
+ "eval_steps_per_second": 11.861,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.57925945520401,
23
  "learning_rate": 1.550763701707098e-05,
24
+ "loss": 0.3716,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8309677419354838,
30
+ "eval_loss": 0.1094982922077179,
31
+ "eval_runtime": 5.8681,
32
+ "eval_samples_per_second": 528.276,
33
+ "eval_steps_per_second": 11.077,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8819354838709678,
39
+ "eval_loss": 0.07135984301567078,
40
+ "eval_runtime": 5.4437,
41
+ "eval_samples_per_second": 569.468,
42
+ "eval_steps_per_second": 11.94,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5091490149497986,
48
  "learning_rate": 1.101527403414196e-05,
49
+ "loss": 0.1274,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.8980645161290323,
55
+ "eval_loss": 0.055049341171979904,
56
+ "eval_runtime": 5.4797,
57
+ "eval_samples_per_second": 565.721,
58
+ "eval_steps_per_second": 11.862,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3538859486579895,
64
  "learning_rate": 6.522911051212939e-06,
65
+ "loss": 0.0809,
66
  "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
+ "eval_accuracy": 0.9061290322580645,
71
+ "eval_loss": 0.04594043269753456,
72
+ "eval_runtime": 5.6864,
73
+ "eval_samples_per_second": 545.158,
74
+ "eval_steps_per_second": 11.431,
75
  "step": 1590
76
  },
77
  {
78
  "epoch": 6.0,
79
+ "eval_accuracy": 0.9090322580645162,
80
+ "eval_loss": 0.04156717658042908,
81
+ "eval_runtime": 5.586,
82
+ "eval_samples_per_second": 554.96,
83
+ "eval_steps_per_second": 11.636,
84
  "step": 1908
85
  },
86
  {
87
  "epoch": 6.289308176100629,
88
+ "grad_norm": 0.31923583149909973,
89
  "learning_rate": 2.0305480682839176e-06,
90
+ "loss": 0.0654,
91
  "step": 2000
92
  }
93
  ],
 
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
+ "alpha": 0.49068870611003523,
116
  "num_train_epochs": 7,
117
+ "temperature": 5
118
  }
119
  }
run-2/checkpoint-2226/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22d5df12fd1120947d14fad0a0388bc240150e4fa0277e30f926f1ec12ddb8e9
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8759fe239e3e09af377de9d8ac9ed677e8c3fe7b8d3bf99d5396a42501d32db2
3
  size 5240
runs/Oct12_06-40-39_b76c1be2ae55/events.out.tfevents.1728717352.b76c1be2ae55.1423.3 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d40e1bffd20d44f6d7d196bd10584be1460641c398a0457ce17732b1f4af602
3
- size 15223
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db09c995ccbc13b91f82d14a76ef099521d2714c6dbf6bd493238eb824d4952
3
+ size 15900