timewanderer commited on
Commit
b4eccb7
1 Parent(s): 16395c2

Training in progress, step 1500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76cadb93027e5ec338e6724a0b24825a1fc09abf2eef793105f9414585309730
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e3dfa7be2ce14e7f0ea5acdcda3076bbb825e51fff60e158046ec8811a22473
3
  size 268290900
run-3/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7a84ca12312755d63f3d6cb7c3f98591fb856de7edceb500316fb8b01dec93a
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d269238e659920d175ea8529919070513eb475cc8cc4981d046f748105493cb2
3
  size 268290900
run-3/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:274859b63376561cadd84391124a2f6182fd6726468ee5dda887a00a719a88f4
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87d8bcd0279b17a6b825fa8fcd97f96b19c502400a65143d1d153cf56a92b506
3
  size 536643898
run-3/checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f182501c34e4ea3ebc7617d27edab7e1367582b147e518cd90295ec7f2eaa0f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c794bc4c67ef18245dd516031ce405ab557e4d551d225d8dd1e1abc0f2be8e33
3
  size 1064
run-3/checkpoint-1000/trainer_state.json CHANGED
@@ -10,50 +10,50 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5574193548387096,
14
- "eval_loss": 0.19900532066822052,
15
- "eval_runtime": 5.3984,
16
- "eval_samples_per_second": 574.243,
17
- "eval_steps_per_second": 12.041,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5346085429191589,
23
- "learning_rate": 1.371069182389937e-05,
24
- "loss": 0.3123,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.7993548387096774,
30
- "eval_loss": 0.1044413223862648,
31
- "eval_runtime": 5.6691,
32
- "eval_samples_per_second": 546.825,
33
- "eval_steps_per_second": 11.466,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.853225806451613,
39
- "eval_loss": 0.07598130404949188,
40
- "eval_runtime": 5.3839,
41
- "eval_samples_per_second": 575.791,
42
- "eval_steps_per_second": 12.073,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.4651307165622711,
48
- "learning_rate": 7.421383647798742e-06,
49
- "loss": 0.1202,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
- "max_steps": 1590,
55
  "num_input_tokens_seen": 0,
56
- "num_train_epochs": 5,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
@@ -71,8 +71,8 @@
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.2168496843217581,
75
- "num_train_epochs": 5,
76
- "temperature": 20
77
  }
78
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6129032258064516,
14
+ "eval_loss": 0.22857815027236938,
15
+ "eval_runtime": 5.1291,
16
+ "eval_samples_per_second": 604.398,
17
+ "eval_steps_per_second": 12.673,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5715988874435425,
23
+ "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.368,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8406451612903226,
30
+ "eval_loss": 0.10308429598808289,
31
+ "eval_runtime": 5.3537,
32
+ "eval_samples_per_second": 579.034,
33
+ "eval_steps_per_second": 12.141,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8909677419354839,
39
+ "eval_loss": 0.06482071429491043,
40
+ "eval_runtime": 5.1876,
41
+ "eval_samples_per_second": 597.576,
42
+ "eval_steps_per_second": 12.53,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.4861523509025574,
48
+ "learning_rate": 1.371069182389937e-05,
49
+ "loss": 0.1205,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
+ "max_steps": 3180,
55
  "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 10,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
 
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.34170044466363136,
75
+ "num_train_epochs": 10,
76
+ "temperature": 5
77
  }
78
  }
run-3/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e06cae4b29f817ae50e2d0e86cf0bfd0dcec8273319eafd13e35e5529d6b14e2
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f3c53667f2d0c994b1e0580a4240dd5f1920dd508edcd4cd8ca7ddb067f7f3
3
  size 5240
run-3/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15d568dbca52ab4915bb81ee9bf99f287c33ed2785910941e8a2d5b6054c7f7b
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e3dfa7be2ce14e7f0ea5acdcda3076bbb825e51fff60e158046ec8811a22473
3
  size 268290900
run-3/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a63704a30df7ca6ce1af5a393053db96871cf5e2ce3e3aea3257a30bac82654
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:578039ccc687432b658165aa96f3b0f2e843d55f7e0de1398b1ee0f672a39638
3
  size 536643898
run-3/checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91d97f7c1be085852e2ffd2fe2f8a493a080e2b2858e2f3baf642fe214e035a0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71453465aad25f4c5a0a948496c64b1f74df850abda497954afe3695c00756ee
3
  size 1064
run-3/checkpoint-1500/trainer_state.json CHANGED
@@ -10,66 +10,66 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5574193548387096,
14
- "eval_loss": 0.19900532066822052,
15
- "eval_runtime": 5.3984,
16
- "eval_samples_per_second": 574.243,
17
- "eval_steps_per_second": 12.041,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5346085429191589,
23
- "learning_rate": 1.371069182389937e-05,
24
- "loss": 0.3123,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.7993548387096774,
30
- "eval_loss": 0.1044413223862648,
31
- "eval_runtime": 5.6691,
32
- "eval_samples_per_second": 546.825,
33
- "eval_steps_per_second": 11.466,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.853225806451613,
39
- "eval_loss": 0.07598130404949188,
40
- "eval_runtime": 5.3839,
41
- "eval_samples_per_second": 575.791,
42
- "eval_steps_per_second": 12.073,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.4651307165622711,
48
- "learning_rate": 7.421383647798742e-06,
49
- "loss": 0.1202,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.8751612903225806,
55
- "eval_loss": 0.0643545612692833,
56
- "eval_runtime": 5.4223,
57
- "eval_samples_per_second": 571.715,
58
- "eval_steps_per_second": 11.988,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.3457496464252472,
64
- "learning_rate": 1.1320754716981133e-06,
65
- "loss": 0.0873,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
- "max_steps": 1590,
71
  "num_input_tokens_seen": 0,
72
- "num_train_epochs": 5,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
@@ -87,8 +87,8 @@
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
- "alpha": 0.2168496843217581,
91
- "num_train_epochs": 5,
92
- "temperature": 20
93
  }
94
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6129032258064516,
14
+ "eval_loss": 0.22857815027236938,
15
+ "eval_runtime": 5.1291,
16
+ "eval_samples_per_second": 604.398,
17
+ "eval_steps_per_second": 12.673,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5715988874435425,
23
+ "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.368,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8406451612903226,
30
+ "eval_loss": 0.10308429598808289,
31
+ "eval_runtime": 5.3537,
32
+ "eval_samples_per_second": 579.034,
33
+ "eval_steps_per_second": 12.141,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8909677419354839,
39
+ "eval_loss": 0.06482071429491043,
40
+ "eval_runtime": 5.1876,
41
+ "eval_samples_per_second": 597.576,
42
+ "eval_steps_per_second": 12.53,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.4861523509025574,
48
+ "learning_rate": 1.371069182389937e-05,
49
+ "loss": 0.1205,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.9032258064516129,
55
+ "eval_loss": 0.04816382750868797,
56
+ "eval_runtime": 5.1502,
57
+ "eval_samples_per_second": 601.919,
58
+ "eval_steps_per_second": 12.621,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3236384689807892,
64
+ "learning_rate": 1.0566037735849058e-05,
65
+ "loss": 0.0727,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
+ "max_steps": 3180,
71
  "num_input_tokens_seen": 0,
72
+ "num_train_epochs": 10,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
 
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.34170044466363136,
91
+ "num_train_epochs": 10,
92
+ "temperature": 5
93
  }
94
  }
run-3/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e06cae4b29f817ae50e2d0e86cf0bfd0dcec8273319eafd13e35e5529d6b14e2
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f3c53667f2d0c994b1e0580a4240dd5f1920dd508edcd4cd8ca7ddb067f7f3
3
  size 5240
run-3/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de522ad9da2147f730c9bef13d580578dd5291d37418f02b528d6bd5928be7ca
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ca6d89d77b30dc89001d0a2e7380a4c048d3716c2663a0e1d21881704782ad6
3
  size 268290900
run-3/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38652956e0755a366a7180402ae805cc004533fa33ad338d15dba9ccde09ef16
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44a088b20b0694a4c988c166bcf8fbc1124a32ad577ee6ba35273769aa011959
3
  size 536643898
run-3/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e1264523e958cf7990dc5f42d876cc12129475c4603804cf66868aaf25c2c24
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04366f62f8f88f5a8265df59adb051b320463277845db80e7fa43f13110c18c9
3
  size 1064
run-3/checkpoint-500/trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5574193548387096,
14
- "eval_loss": 0.19900532066822052,
15
- "eval_runtime": 5.3984,
16
- "eval_samples_per_second": 574.243,
17
- "eval_steps_per_second": 12.041,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5346085429191589,
23
- "learning_rate": 1.371069182389937e-05,
24
- "loss": 0.3123,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
- "max_steps": 1590,
30
  "num_input_tokens_seen": 0,
31
- "num_train_epochs": 5,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
@@ -46,8 +46,8 @@
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.2168496843217581,
50
- "num_train_epochs": 5,
51
- "temperature": 20
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6129032258064516,
14
+ "eval_loss": 0.22857815027236938,
15
+ "eval_runtime": 5.1291,
16
+ "eval_samples_per_second": 604.398,
17
+ "eval_steps_per_second": 12.673,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5715988874435425,
23
+ "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.368,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 3180,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 10,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
 
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.34170044466363136,
50
+ "num_train_epochs": 10,
51
+ "temperature": 5
52
  }
53
  }
run-3/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e06cae4b29f817ae50e2d0e86cf0bfd0dcec8273319eafd13e35e5529d6b14e2
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f3c53667f2d0c994b1e0580a4240dd5f1920dd508edcd4cd8ca7ddb067f7f3
3
  size 5240
runs/Oct11_14-59-09_984d9c1bc9e1/events.out.tfevents.1728660671.984d9c1bc9e1.1239.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8ec65ff4fb66f0f721c336cad45260b11c987cd5c668b7899b2a95142003a3
3
+ size 15224
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e09a3826ab4ffc76c9981d2ccde475f4c95a27c3ed7ffae8225ce76a3cd34eeb
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f3c53667f2d0c994b1e0580a4240dd5f1920dd508edcd4cd8ca7ddb067f7f3
3
  size 5240