timewanderer commited on
Commit
e4eb77d
1 Parent(s): 0b573dc

Training in progress, step 1500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:342948fd45a1d89a95c5f4f1bdfc6bdd5c2b76834819d9a7fd29b55a99531948
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dfbfcd7ae1710dc78f86029d19911aebcbf6263be42f5e1fad4d67a84af60e3
3
  size 268290900
run-3/checkpoint-1000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d269238e659920d175ea8529919070513eb475cc8cc4981d046f748105493cb2
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dc069a706a506b11ab20901dc7bba81d30aebb240bc31a6eb93d221d4377cad
3
  size 268290900
run-3/checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87d8bcd0279b17a6b825fa8fcd97f96b19c502400a65143d1d153cf56a92b506
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3489d76896528cdab6dfbb9d57397fdb9e2e3ba0a8a9e7273c16fbbdaffdafb9
3
  size 536643898
run-3/checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c794bc4c67ef18245dd516031ce405ab557e4d551d225d8dd1e1abc0f2be8e33
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60ef01273aaa599804d51a93c6c0c61874ebd50ab9e135a21aab5f7cd0e6487a
3
  size 1064
run-3/checkpoint-1000/trainer_state.json CHANGED
@@ -10,50 +10,50 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6129032258064516,
14
- "eval_loss": 0.22857815027236938,
15
- "eval_runtime": 5.1291,
16
- "eval_samples_per_second": 604.398,
17
- "eval_steps_per_second": 12.673,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5715988874435425,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.368,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8406451612903226,
30
- "eval_loss": 0.10308429598808289,
31
- "eval_runtime": 5.3537,
32
- "eval_samples_per_second": 579.034,
33
- "eval_steps_per_second": 12.141,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8909677419354839,
39
- "eval_loss": 0.06482071429491043,
40
- "eval_runtime": 5.1876,
41
- "eval_samples_per_second": 597.576,
42
- "eval_steps_per_second": 12.53,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.4861523509025574,
48
- "learning_rate": 1.371069182389937e-05,
49
- "loss": 0.1205,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
- "max_steps": 3180,
55
  "num_input_tokens_seen": 0,
56
- "num_train_epochs": 10,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
@@ -71,8 +71,8 @@
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.34170044466363136,
75
- "num_train_epochs": 10,
76
- "temperature": 5
77
  }
78
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5796774193548387,
14
+ "eval_loss": 0.20986367762088776,
15
+ "eval_runtime": 5.1673,
16
+ "eval_samples_per_second": 599.927,
17
+ "eval_steps_per_second": 12.579,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5460540056228638,
23
+ "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.332,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8129032258064516,
30
+ "eval_loss": 0.1050410121679306,
31
+ "eval_runtime": 5.3586,
32
+ "eval_samples_per_second": 578.505,
33
+ "eval_steps_per_second": 12.13,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8709677419354839,
39
+ "eval_loss": 0.07279336452484131,
40
+ "eval_runtime": 5.2066,
41
+ "eval_samples_per_second": 595.399,
42
+ "eval_steps_per_second": 12.484,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.4723931849002838,
48
+ "learning_rate": 9.517819706498952e-06,
49
+ "loss": 0.1214,
50
  "step": 1000
51
  }
52
  ],
53
  "logging_steps": 500,
54
+ "max_steps": 1908,
55
  "num_input_tokens_seen": 0,
56
+ "num_train_epochs": 6,
57
  "save_steps": 500,
58
  "stateful_callbacks": {
59
  "TrainerControl": {
 
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.9907493566825466,
75
+ "num_train_epochs": 6,
76
+ "temperature": 9
77
  }
78
  }
run-3/checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13f3c53667f2d0c994b1e0580a4240dd5f1920dd508edcd4cd8ca7ddb067f7f3
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85143060eb3d68597fa3be18a06447136e3eefcb180aa3b82a2a8463f5f692a
3
  size 5240
run-3/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e3dfa7be2ce14e7f0ea5acdcda3076bbb825e51fff60e158046ec8811a22473
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dfbfcd7ae1710dc78f86029d19911aebcbf6263be42f5e1fad4d67a84af60e3
3
  size 268290900
run-3/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:578039ccc687432b658165aa96f3b0f2e843d55f7e0de1398b1ee0f672a39638
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36c4fbd6b433cb76cfb9c82d6958f6d74f64d6bee7012fa72823d27b3cea8048
3
  size 536643898
run-3/checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71453465aad25f4c5a0a948496c64b1f74df850abda497954afe3695c00756ee
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55c8d3ce0734337fc0c187ca5543b4c70ca45d996531f199209b3a0c2a798109
3
  size 1064
run-3/checkpoint-1500/trainer_state.json CHANGED
@@ -10,66 +10,66 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6129032258064516,
14
- "eval_loss": 0.22857815027236938,
15
- "eval_runtime": 5.1291,
16
- "eval_samples_per_second": 604.398,
17
- "eval_steps_per_second": 12.673,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5715988874435425,
23
- "learning_rate": 1.685534591194969e-05,
24
- "loss": 0.368,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.8406451612903226,
30
- "eval_loss": 0.10308429598808289,
31
- "eval_runtime": 5.3537,
32
- "eval_samples_per_second": 579.034,
33
- "eval_steps_per_second": 12.141,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.8909677419354839,
39
- "eval_loss": 0.06482071429491043,
40
- "eval_runtime": 5.1876,
41
- "eval_samples_per_second": 597.576,
42
- "eval_steps_per_second": 12.53,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
- "grad_norm": 0.4861523509025574,
48
- "learning_rate": 1.371069182389937e-05,
49
- "loss": 0.1205,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9032258064516129,
55
- "eval_loss": 0.04816382750868797,
56
- "eval_runtime": 5.1502,
57
- "eval_samples_per_second": 601.919,
58
- "eval_steps_per_second": 12.621,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
- "grad_norm": 0.3236384689807892,
64
- "learning_rate": 1.0566037735849058e-05,
65
- "loss": 0.0727,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
- "max_steps": 3180,
71
  "num_input_tokens_seen": 0,
72
- "num_train_epochs": 10,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
@@ -87,8 +87,8 @@
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
- "alpha": 0.34170044466363136,
91
- "num_train_epochs": 10,
92
- "temperature": 5
93
  }
94
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.5796774193548387,
14
+ "eval_loss": 0.20986367762088776,
15
+ "eval_runtime": 5.1673,
16
+ "eval_samples_per_second": 599.927,
17
+ "eval_steps_per_second": 12.579,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5460540056228638,
23
+ "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.332,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8129032258064516,
30
+ "eval_loss": 0.1050410121679306,
31
+ "eval_runtime": 5.3586,
32
+ "eval_samples_per_second": 578.505,
33
+ "eval_steps_per_second": 12.13,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8709677419354839,
39
+ "eval_loss": 0.07279336452484131,
40
+ "eval_runtime": 5.2066,
41
+ "eval_samples_per_second": 595.399,
42
+ "eval_steps_per_second": 12.484,
43
  "step": 954
44
  },
45
  {
46
  "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.4723931849002838,
48
+ "learning_rate": 9.517819706498952e-06,
49
+ "loss": 0.1214,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.892258064516129,
55
+ "eval_loss": 0.058892734348773956,
56
+ "eval_runtime": 5.2068,
57
+ "eval_samples_per_second": 595.377,
58
+ "eval_steps_per_second": 12.484,
59
  "step": 1272
60
  },
61
  {
62
  "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3437352180480957,
64
+ "learning_rate": 4.276729559748428e-06,
65
+ "loss": 0.0826,
66
  "step": 1500
67
  }
68
  ],
69
  "logging_steps": 500,
70
+ "max_steps": 1908,
71
  "num_input_tokens_seen": 0,
72
+ "num_train_epochs": 6,
73
  "save_steps": 500,
74
  "stateful_callbacks": {
75
  "TrainerControl": {
 
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.9907493566825466,
91
+ "num_train_epochs": 6,
92
+ "temperature": 9
93
  }
94
  }
run-3/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13f3c53667f2d0c994b1e0580a4240dd5f1920dd508edcd4cd8ca7ddb067f7f3
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a85143060eb3d68597fa3be18a06447136e3eefcb180aa3b82a2a8463f5f692a
3
  size 5240
runs/Oct11_17-03-17_c179252869f4/events.out.tfevents.1728668441.c179252869f4.732.4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d895f31b6e2f5f4ed03220d73decd50b20bd2ab120fd016eb69b2b17969214f
3
- size 13832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f711337f1493283db26ffcc6126a461fc92f5e327516a1d3f66c75798ab4e5b
3
+ size 15366