krishna-exe commited on
Commit
b64e1e2
1 Parent(s): f86e3ca

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.8685015290519877,
4
- "eval_loss": 0.34957513213157654,
5
- "eval_runtime": 2.9329,
6
- "eval_samples_per_second": 111.492,
7
- "eval_steps_per_second": 3.751,
8
- "total_flos": 2.2838958959910912e+17,
9
- "train_loss": 0.41367715683536255,
10
- "train_runtime": 173.7772,
11
- "train_samples_per_second": 50.703,
12
- "train_steps_per_second": 0.397
13
  }
 
1
  {
2
+ "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9477351916376306,
4
+ "eval_loss": 0.11578787863254547,
5
+ "eval_runtime": 2.7445,
6
+ "eval_samples_per_second": 104.573,
7
+ "eval_steps_per_second": 6.559,
8
+ "total_flos": 3.1727957353537536e+17,
9
+ "train_loss": 0.41540566325187683,
10
+ "train_runtime": 248.6508,
11
+ "train_samples_per_second": 51.94,
12
+ "train_steps_per_second": 0.804
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "eval_accuracy": 0.8685015290519877,
4
- "eval_loss": 0.34957513213157654,
5
- "eval_runtime": 2.9329,
6
- "eval_samples_per_second": 111.492,
7
- "eval_steps_per_second": 3.751
8
  }
 
1
  {
2
+ "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9477351916376306,
4
+ "eval_loss": 0.11578787863254547,
5
+ "eval_runtime": 2.7445,
6
+ "eval_samples_per_second": 104.573,
7
+ "eval_steps_per_second": 6.559
8
  }
runs/Oct10_11-22-23_e11ecca18f17/events.out.tfevents.1728559702.e11ecca18f17.609.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06fbe26e63fdc02be19f4f2eabd2f1523e7ae6faeeb6890b302671a6b7c14b13
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 3.0,
3
- "total_flos": 2.2838958959910912e+17,
4
- "train_loss": 0.41367715683536255,
5
- "train_runtime": 173.7772,
6
- "train_samples_per_second": 50.703,
7
- "train_steps_per_second": 0.397
8
  }
 
1
  {
2
+ "epoch": 4.938271604938271,
3
+ "total_flos": 3.1727957353537536e+17,
4
+ "train_loss": 0.41540566325187683,
5
+ "train_runtime": 248.6508,
6
+ "train_samples_per_second": 51.94,
7
+ "train_steps_per_second": 0.804
8
  }
trainer_state.json CHANGED
@@ -1,96 +1,212 @@
1
  {
2
- "best_metric": 0.8685015290519877,
3
- "best_model_checkpoint": "brain-tumor-classification/checkpoint-69",
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 69,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.43478260869565216,
13
- "grad_norm": 7.908116340637207,
14
- "learning_rate": 4.7580645161290326e-05,
15
- "loss": 0.623,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.8695652173913043,
20
- "grad_norm": 10.378320693969727,
21
- "learning_rate": 3.951612903225806e-05,
22
- "loss": 0.5239,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 1.0,
27
- "eval_accuracy": 0.7889908256880734,
28
- "eval_loss": 0.5283120274543762,
29
- "eval_runtime": 3.5122,
30
- "eval_samples_per_second": 93.103,
31
- "eval_steps_per_second": 3.132,
32
- "step": 23
33
  },
34
  {
35
- "epoch": 1.3043478260869565,
36
- "grad_norm": 5.665172100067139,
37
- "learning_rate": 3.1451612903225806e-05,
38
- "loss": 0.4202,
39
- "step": 30
40
  },
41
  {
42
- "epoch": 1.7391304347826086,
43
- "grad_norm": 9.73236083984375,
44
- "learning_rate": 2.338709677419355e-05,
45
- "loss": 0.3716,
 
 
46
  "step": 40
47
  },
48
  {
49
- "epoch": 2.0,
50
- "eval_accuracy": 0.8409785932721713,
51
- "eval_loss": 0.3933815658092499,
52
- "eval_runtime": 2.873,
53
- "eval_samples_per_second": 113.816,
54
- "eval_steps_per_second": 3.829,
55
- "step": 46
56
- },
57
- {
58
- "epoch": 2.1739130434782608,
59
- "grad_norm": 8.678784370422363,
60
- "learning_rate": 1.5322580645161292e-05,
61
- "loss": 0.3549,
62
  "step": 50
63
  },
64
  {
65
- "epoch": 2.608695652173913,
66
- "grad_norm": 5.479146957397461,
67
- "learning_rate": 7.258064516129033e-06,
68
- "loss": 0.2964,
69
  "step": 60
70
  },
71
  {
72
- "epoch": 3.0,
73
- "eval_accuracy": 0.8685015290519877,
74
- "eval_loss": 0.34957513213157654,
75
- "eval_runtime": 3.2055,
76
- "eval_samples_per_second": 102.012,
77
- "eval_steps_per_second": 3.432,
78
- "step": 69
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  },
80
  {
81
- "epoch": 3.0,
82
- "step": 69,
83
- "total_flos": 2.2838958959910912e+17,
84
- "train_loss": 0.41367715683536255,
85
- "train_runtime": 173.7772,
86
- "train_samples_per_second": 50.703,
87
- "train_steps_per_second": 0.397
88
  }
89
  ],
90
  "logging_steps": 10,
91
- "max_steps": 69,
92
  "num_input_tokens_seen": 0,
93
- "num_train_epochs": 3,
94
  "save_steps": 500,
95
  "stateful_callbacks": {
96
  "TrainerControl": {
@@ -104,8 +220,8 @@
104
  "attributes": {}
105
  }
106
  },
107
- "total_flos": 2.2838958959910912e+17,
108
- "train_batch_size": 32,
109
  "trial_name": null,
110
  "trial_params": null
111
  }
 
1
  {
2
+ "best_metric": 0.9477351916376306,
3
+ "best_model_checkpoint": "brain-tumor-classification/checkpoint-200",
4
+ "epoch": 4.938271604938271,
5
  "eval_steps": 500,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.24691358024691357,
13
+ "grad_norm": 11.865983009338379,
14
+ "learning_rate": 2.5e-05,
15
+ "loss": 1.335,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.49382716049382713,
20
+ "grad_norm": 14.611420631408691,
21
+ "learning_rate": 5e-05,
22
+ "loss": 1.0649,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.7407407407407407,
27
+ "grad_norm": 14.365787506103516,
28
+ "learning_rate": 4.722222222222222e-05,
29
+ "loss": 0.7611,
30
+ "step": 30
 
 
31
  },
32
  {
33
+ "epoch": 0.9876543209876543,
34
+ "grad_norm": 16.896533966064453,
35
+ "learning_rate": 4.4444444444444447e-05,
36
+ "loss": 0.5761,
37
+ "step": 40
38
  },
39
  {
40
+ "epoch": 0.9876543209876543,
41
+ "eval_accuracy": 0.8432055749128919,
42
+ "eval_loss": 0.41130325198173523,
43
+ "eval_runtime": 2.4201,
44
+ "eval_samples_per_second": 118.592,
45
+ "eval_steps_per_second": 7.438,
46
  "step": 40
47
  },
48
  {
49
+ "epoch": 1.2345679012345678,
50
+ "grad_norm": 9.885971069335938,
51
+ "learning_rate": 4.166666666666667e-05,
52
+ "loss": 0.5027,
 
 
 
 
 
 
 
 
 
53
  "step": 50
54
  },
55
  {
56
+ "epoch": 1.4814814814814814,
57
+ "grad_norm": 19.173418045043945,
58
+ "learning_rate": 3.888888888888889e-05,
59
+ "loss": 0.4857,
60
  "step": 60
61
  },
62
  {
63
+ "epoch": 1.7283950617283952,
64
+ "grad_norm": 10.684455871582031,
65
+ "learning_rate": 3.611111111111111e-05,
66
+ "loss": 0.4252,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 1.9753086419753085,
71
+ "grad_norm": 11.967300415039062,
72
+ "learning_rate": 3.3333333333333335e-05,
73
+ "loss": 0.3871,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 2.0,
78
+ "eval_accuracy": 0.9024390243902439,
79
+ "eval_loss": 0.25702860951423645,
80
+ "eval_runtime": 3.0171,
81
+ "eval_samples_per_second": 95.125,
82
+ "eval_steps_per_second": 5.966,
83
+ "step": 81
84
+ },
85
+ {
86
+ "epoch": 2.2222222222222223,
87
+ "grad_norm": 7.449192047119141,
88
+ "learning_rate": 3.055555555555556e-05,
89
+ "loss": 0.294,
90
+ "step": 90
91
+ },
92
+ {
93
+ "epoch": 2.4691358024691357,
94
+ "grad_norm": 15.003212928771973,
95
+ "learning_rate": 2.777777777777778e-05,
96
+ "loss": 0.3583,
97
+ "step": 100
98
+ },
99
+ {
100
+ "epoch": 2.7160493827160495,
101
+ "grad_norm": 12.026302337646484,
102
+ "learning_rate": 2.5e-05,
103
+ "loss": 0.2584,
104
+ "step": 110
105
+ },
106
+ {
107
+ "epoch": 2.962962962962963,
108
+ "grad_norm": 10.30452823638916,
109
+ "learning_rate": 2.2222222222222223e-05,
110
+ "loss": 0.2586,
111
+ "step": 120
112
+ },
113
+ {
114
+ "epoch": 2.9876543209876543,
115
+ "eval_accuracy": 0.9407665505226481,
116
+ "eval_loss": 0.1910204291343689,
117
+ "eval_runtime": 2.8228,
118
+ "eval_samples_per_second": 101.672,
119
+ "eval_steps_per_second": 6.377,
120
+ "step": 121
121
+ },
122
+ {
123
+ "epoch": 3.2098765432098766,
124
+ "grad_norm": 9.547262191772461,
125
+ "learning_rate": 1.9444444444444445e-05,
126
+ "loss": 0.1965,
127
+ "step": 130
128
+ },
129
+ {
130
+ "epoch": 3.45679012345679,
131
+ "grad_norm": 8.193156242370605,
132
+ "learning_rate": 1.6666666666666667e-05,
133
+ "loss": 0.2272,
134
+ "step": 140
135
+ },
136
+ {
137
+ "epoch": 3.7037037037037037,
138
+ "grad_norm": 6.964083671569824,
139
+ "learning_rate": 1.388888888888889e-05,
140
+ "loss": 0.237,
141
+ "step": 150
142
+ },
143
+ {
144
+ "epoch": 3.950617283950617,
145
+ "grad_norm": 15.732324600219727,
146
+ "learning_rate": 1.1111111111111112e-05,
147
+ "loss": 0.2164,
148
+ "step": 160
149
+ },
150
+ {
151
+ "epoch": 4.0,
152
+ "eval_accuracy": 0.9442508710801394,
153
+ "eval_loss": 0.13123387098312378,
154
+ "eval_runtime": 2.4291,
155
+ "eval_samples_per_second": 118.153,
156
+ "eval_steps_per_second": 7.41,
157
+ "step": 162
158
+ },
159
+ {
160
+ "epoch": 4.197530864197531,
161
+ "grad_norm": 7.277712821960449,
162
+ "learning_rate": 8.333333333333334e-06,
163
+ "loss": 0.1614,
164
+ "step": 170
165
+ },
166
+ {
167
+ "epoch": 4.444444444444445,
168
+ "grad_norm": 13.366209030151367,
169
+ "learning_rate": 5.555555555555556e-06,
170
+ "loss": 0.1978,
171
+ "step": 180
172
+ },
173
+ {
174
+ "epoch": 4.6913580246913575,
175
+ "grad_norm": 9.085039138793945,
176
+ "learning_rate": 2.777777777777778e-06,
177
+ "loss": 0.189,
178
+ "step": 190
179
+ },
180
+ {
181
+ "epoch": 4.938271604938271,
182
+ "grad_norm": 7.736137866973877,
183
+ "learning_rate": 0.0,
184
+ "loss": 0.1757,
185
+ "step": 200
186
+ },
187
+ {
188
+ "epoch": 4.938271604938271,
189
+ "eval_accuracy": 0.9477351916376306,
190
+ "eval_loss": 0.11578787863254547,
191
+ "eval_runtime": 2.7368,
192
+ "eval_samples_per_second": 104.868,
193
+ "eval_steps_per_second": 6.577,
194
+ "step": 200
195
  },
196
  {
197
+ "epoch": 4.938271604938271,
198
+ "step": 200,
199
+ "total_flos": 3.1727957353537536e+17,
200
+ "train_loss": 0.41540566325187683,
201
+ "train_runtime": 248.6508,
202
+ "train_samples_per_second": 51.94,
203
+ "train_steps_per_second": 0.804
204
  }
205
  ],
206
  "logging_steps": 10,
207
+ "max_steps": 200,
208
  "num_input_tokens_seen": 0,
209
+ "num_train_epochs": 5,
210
  "save_steps": 500,
211
  "stateful_callbacks": {
212
  "TrainerControl": {
 
220
  "attributes": {}
221
  }
222
  },
223
+ "total_flos": 3.1727957353537536e+17,
224
+ "train_batch_size": 16,
225
  "trial_name": null,
226
  "trial_params": null
227
  }