timewanderer commited on
Commit
b4653c5
1 Parent(s): 483b62c

Training in progress, step 1500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ced52e0685875b8d675e97aaf647b1f9ccce4784eb0c6a6f8c8641ca10b9da5
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebd8f296a86cd8022eec6263fe9e1abf4ee83487f9bbcae7b7ade8d89d337d1a
3
  size 268290900
run-4/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:866a5f62148435f94b3a1364b88a97b7dcf18eedf77829138812f80eb018ad2a
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b838338a2308724453de7476d9b1c3bba2249c23a350f295167211e17d9424
3
  size 268290900
run-4/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:313fb3854975931f25c0c54840393d72028b1ef9815be13b182f9d4b40f2a329
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63d428f737fbba0a4c7235dc0b71a8b84d4fa763e538391547e0e9460ca8ca2d
3
  size 536643898
run-4/checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f182501c34e4ea3ebc7617d27edab7e1367582b147e518cd90295ec7f2eaa0f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60ef01273aaa599804d51a93c6c0c61874ebd50ab9e135a21aab5f7cd0e6487a
3
  size 1064
run-4/checkpoint-1000/trainer_state.json CHANGED
@@ -10,50 +10,50 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6325806451612903,
14
- "eval_loss": 0.3024460971355438,
15
- "eval_runtime": 5.4182,
16
- "eval_samples_per_second": 572.151,
17
- "eval_steps_per_second": 11.997,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6661854386329651,
23
- "learning_rate": 1.371069182389937e-05,
24
- "loss": 0.4696,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8274193548387097,
30
- "eval_loss": 0.13584715127944946,
31
- "eval_runtime": 5.4989,
32
- "eval_samples_per_second": 563.751,
33
- "eval_steps_per_second": 11.821,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8712903225806452,
39
- "eval_loss": 0.08720238506793976,
40
- "eval_runtime": 5.4432,
41
- "eval_samples_per_second": 569.518,
42
- "eval_steps_per_second": 11.941,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.6423004865646362,
48
- "learning_rate": 7.421383647798742e-06,
49
- "loss": 0.1584,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
- "max_steps": 1590,
55
  "num_input_tokens_seen": 0,
56
- "num_train_epochs": 5,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
@@ -71,8 +71,8 @@
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.41539799624272716,
75
- "num_train_epochs": 5,
76
  "temperature": 3
77
  }
78
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6406451612903226,
14
+ "eval_loss": 0.2967454195022583,
15
+ "eval_runtime": 5.4866,
16
+ "eval_samples_per_second": 565.017,
17
+ "eval_steps_per_second": 11.847,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.6655566096305847,
23
+ "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.4655,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8335483870967741,
30
+ "eval_loss": 0.12780845165252686,
31
+ "eval_runtime": 5.7922,
32
+ "eval_samples_per_second": 535.206,
33
+ "eval_steps_per_second": 11.222,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8851612903225806,
39
+ "eval_loss": 0.07867012172937393,
40
+ "eval_runtime": 5.484,
41
+ "eval_samples_per_second": 565.28,
42
+ "eval_steps_per_second": 11.853,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.6149155497550964,
48
+ "learning_rate": 9.517819706498952e-06,
49
+ "loss": 0.1495,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
+ "max_steps": 1908,
55
  "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 6,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
 
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.6003183502544485,
75
+ "num_train_epochs": 6,
76
  "temperature": 3
77
  }
78
  }
run-4/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42fcda7114a3cb0f210fff0e0a799f161a86990fe91444d8130cb28445b06005
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e2230760e0d209a41a97071088ff5c736b18829fcf11d0286149c73a45dbed
3
  size 5240
run-4/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40578156a126d8bc2399e1d908fa81069a4992a0bc195381dd64b7d3e04f3de9
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebd8f296a86cd8022eec6263fe9e1abf4ee83487f9bbcae7b7ade8d89d337d1a
3
  size 268290900
run-4/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ce27375ff8342917b293709988fb26d89a19c91bdf65ed569fea01e2ded88e9
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b91d84f1993b5b55c784280f6726bf11abeb203b23b2d8be5149fcac0dea9f4c
3
  size 536643898
run-4/checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91d97f7c1be085852e2ffd2fe2f8a493a080e2b2858e2f3baf642fe214e035a0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55c8d3ce0734337fc0c187ca5543b4c70ca45d996531f199209b3a0c2a798109
3
  size 1064
run-4/checkpoint-1500/trainer_state.json CHANGED
@@ -10,66 +10,66 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6325806451612903,
14
- "eval_loss": 0.3024460971355438,
15
- "eval_runtime": 5.4182,
16
- "eval_samples_per_second": 572.151,
17
- "eval_steps_per_second": 11.997,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6661854386329651,
23
- "learning_rate": 1.371069182389937e-05,
24
- "loss": 0.4696,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8274193548387097,
30
- "eval_loss": 0.13584715127944946,
31
- "eval_runtime": 5.4989,
32
- "eval_samples_per_second": 563.751,
33
- "eval_steps_per_second": 11.821,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8712903225806452,
39
- "eval_loss": 0.08720238506793976,
40
- "eval_runtime": 5.4432,
41
- "eval_samples_per_second": 569.518,
42
- "eval_steps_per_second": 11.941,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.6423004865646362,
48
- "learning_rate": 7.421383647798742e-06,
49
- "loss": 0.1584,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.8945161290322581,
55
- "eval_loss": 0.06926823407411575,
56
- "eval_runtime": 5.4202,
57
- "eval_samples_per_second": 571.937,
58
- "eval_steps_per_second": 11.992,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.44296959042549133,
64
- "learning_rate": 1.1320754716981133e-06,
65
- "loss": 0.1029,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
- "max_steps": 1590,
71
  "num_input_tokens_seen": 0,
72
- "num_train_epochs": 5,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
@@ -87,8 +87,8 @@
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
- "alpha": 0.41539799624272716,
91
- "num_train_epochs": 5,
92
  "temperature": 3
93
  }
94
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6406451612903226,
14
+ "eval_loss": 0.2967454195022583,
15
+ "eval_runtime": 5.4866,
16
+ "eval_samples_per_second": 565.017,
17
+ "eval_steps_per_second": 11.847,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.6655566096305847,
23
+ "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.4655,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8335483870967741,
30
+ "eval_loss": 0.12780845165252686,
31
+ "eval_runtime": 5.7922,
32
+ "eval_samples_per_second": 535.206,
33
+ "eval_steps_per_second": 11.222,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8851612903225806,
39
+ "eval_loss": 0.07867012172937393,
40
+ "eval_runtime": 5.484,
41
+ "eval_samples_per_second": 565.28,
42
+ "eval_steps_per_second": 11.853,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.6149155497550964,
48
+ "learning_rate": 9.517819706498952e-06,
49
+ "loss": 0.1495,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.9,
55
+ "eval_loss": 0.05978764593601227,
56
+ "eval_runtime": 5.547,
57
+ "eval_samples_per_second": 558.865,
58
+ "eval_steps_per_second": 11.718,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.43114015460014343,
64
+ "learning_rate": 4.276729559748428e-06,
65
+ "loss": 0.0911,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
+ "max_steps": 1908,
71
  "num_input_tokens_seen": 0,
72
+ "num_train_epochs": 6,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
 
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.6003183502544485,
91
+ "num_train_epochs": 6,
92
  "temperature": 3
93
  }
94
  }
run-4/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42fcda7114a3cb0f210fff0e0a799f161a86990fe91444d8130cb28445b06005
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e2230760e0d209a41a97071088ff5c736b18829fcf11d0286149c73a45dbed
3
  size 5240
runs/Oct12_06-40-39_b76c1be2ae55/events.out.tfevents.1728718423.b76c1be2ae55.1423.5 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db198a5f0e14ccda4309965ca889bf39e9b67c3eab3499a77df07f843e475e4b
3
- size 14155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f732651deb890d44c0204f14e20053d5c4cbb6fa7e12cb3d59061a197906d16
3
+ size 15366