krishna-exe commited on
Commit
d8990d4
1 Parent(s): eccf90f

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 4.938271604938271,
3
- "eval_accuracy": 0.9477351916376306,
4
- "eval_loss": 0.11578787863254547,
5
- "eval_runtime": 2.7445,
6
- "eval_samples_per_second": 104.573,
7
- "eval_steps_per_second": 6.559,
8
  "total_flos": 3.1727957353537536e+17,
9
- "train_loss": 0.41540566325187683,
10
- "train_runtime": 248.6508,
11
- "train_samples_per_second": 51.94,
12
- "train_steps_per_second": 0.804
13
  }
 
1
  {
2
  "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9368950832365467,
4
+ "eval_loss": 0.16935844719409943,
5
+ "eval_runtime": 25.4894,
6
+ "eval_samples_per_second": 101.336,
7
+ "eval_steps_per_second": 6.356,
8
  "total_flos": 3.1727957353537536e+17,
9
+ "train_loss": 0.4196511161327362,
10
+ "train_runtime": 246.9642,
11
+ "train_samples_per_second": 52.295,
12
+ "train_steps_per_second": 0.81
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.938271604938271,
3
- "eval_accuracy": 0.9477351916376306,
4
- "eval_loss": 0.11578787863254547,
5
- "eval_runtime": 2.7445,
6
- "eval_samples_per_second": 104.573,
7
- "eval_steps_per_second": 6.559
8
  }
 
1
  {
2
  "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9547038327526133,
4
+ "eval_loss": 0.12142720818519592,
5
+ "eval_runtime": 2.5284,
6
+ "eval_samples_per_second": 113.511,
7
+ "eval_steps_per_second": 7.119
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3066a6576511f8b1883aff008c864835f27728376c417b4e508e6b0db03602f
3
  size 110348984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d741f951e705b7ffa0e757168f7ac730981b0e25bf7c8a91391460719849e34
3
  size 110348984
runs/Oct17_04-14-12_05a7a6f58592/events.out.tfevents.1729138717.05a7a6f58592.1707.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e53f1758ef5ff11dc93dac058e52d0c707f49c1188eb6bc2e971f24210601e
3
+ size 1057
runs/Oct17_04-44-59_05a7a6f58592/events.out.tfevents.1729140379.05a7a6f58592.1707.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:417035168be3584c04a8684f64e4ddd6945d2541033bb9d11c01b15789908fc4
3
+ size 6581
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.938271604938271,
3
- "total_flos": 3.1727957353537536e+17,
4
- "train_loss": 0.41540566325187683,
5
- "train_runtime": 248.6508,
6
- "train_samples_per_second": 51.94,
7
- "train_steps_per_second": 0.804
8
  }
 
1
  {
2
  "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9368950832365467,
4
+ "eval_loss": 0.16935844719409943,
5
+ "eval_runtime": 25.4894,
6
+ "eval_samples_per_second": 101.336,
7
+ "eval_steps_per_second": 6.356
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9477351916376306,
3
  "best_model_checkpoint": "brain-tumor-classification/checkpoint-200",
4
  "epoch": 4.938271604938271,
5
  "eval_steps": 500,
@@ -10,197 +10,224 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.24691358024691357,
13
- "grad_norm": 11.865983009338379,
14
  "learning_rate": 2.5e-05,
15
- "loss": 1.335,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.49382716049382713,
20
- "grad_norm": 14.611420631408691,
21
  "learning_rate": 5e-05,
22
- "loss": 1.0649,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.7407407407407407,
27
- "grad_norm": 14.365787506103516,
28
  "learning_rate": 4.722222222222222e-05,
29
- "loss": 0.7611,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.9876543209876543,
34
- "grad_norm": 16.896533966064453,
35
  "learning_rate": 4.4444444444444447e-05,
36
- "loss": 0.5761,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.9876543209876543,
41
- "eval_accuracy": 0.8432055749128919,
42
- "eval_loss": 0.41130325198173523,
43
- "eval_runtime": 2.4201,
44
- "eval_samples_per_second": 118.592,
45
- "eval_steps_per_second": 7.438,
46
  "step": 40
47
  },
48
  {
49
  "epoch": 1.2345679012345678,
50
- "grad_norm": 9.885971069335938,
51
  "learning_rate": 4.166666666666667e-05,
52
- "loss": 0.5027,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.4814814814814814,
57
- "grad_norm": 19.173418045043945,
58
  "learning_rate": 3.888888888888889e-05,
59
- "loss": 0.4857,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.7283950617283952,
64
- "grad_norm": 10.684455871582031,
65
  "learning_rate": 3.611111111111111e-05,
66
- "loss": 0.4252,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.9753086419753085,
71
- "grad_norm": 11.967300415039062,
72
  "learning_rate": 3.3333333333333335e-05,
73
- "loss": 0.3871,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 2.0,
78
- "eval_accuracy": 0.9024390243902439,
79
- "eval_loss": 0.25702860951423645,
80
- "eval_runtime": 3.0171,
81
- "eval_samples_per_second": 95.125,
82
- "eval_steps_per_second": 5.966,
83
  "step": 81
84
  },
85
  {
86
  "epoch": 2.2222222222222223,
87
- "grad_norm": 7.449192047119141,
88
  "learning_rate": 3.055555555555556e-05,
89
- "loss": 0.294,
90
  "step": 90
91
  },
92
  {
93
  "epoch": 2.4691358024691357,
94
- "grad_norm": 15.003212928771973,
95
  "learning_rate": 2.777777777777778e-05,
96
- "loss": 0.3583,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.7160493827160495,
101
- "grad_norm": 12.026302337646484,
102
  "learning_rate": 2.5e-05,
103
- "loss": 0.2584,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.962962962962963,
108
- "grad_norm": 10.30452823638916,
109
  "learning_rate": 2.2222222222222223e-05,
110
- "loss": 0.2586,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 2.9876543209876543,
115
- "eval_accuracy": 0.9407665505226481,
116
- "eval_loss": 0.1910204291343689,
117
- "eval_runtime": 2.8228,
118
- "eval_samples_per_second": 101.672,
119
- "eval_steps_per_second": 6.377,
120
  "step": 121
121
  },
122
  {
123
  "epoch": 3.2098765432098766,
124
- "grad_norm": 9.547262191772461,
125
  "learning_rate": 1.9444444444444445e-05,
126
- "loss": 0.1965,
127
  "step": 130
128
  },
129
  {
130
  "epoch": 3.45679012345679,
131
- "grad_norm": 8.193156242370605,
132
  "learning_rate": 1.6666666666666667e-05,
133
- "loss": 0.2272,
134
  "step": 140
135
  },
136
  {
137
  "epoch": 3.7037037037037037,
138
- "grad_norm": 6.964083671569824,
139
  "learning_rate": 1.388888888888889e-05,
140
- "loss": 0.237,
141
  "step": 150
142
  },
143
  {
144
  "epoch": 3.950617283950617,
145
- "grad_norm": 15.732324600219727,
146
  "learning_rate": 1.1111111111111112e-05,
147
- "loss": 0.2164,
148
  "step": 160
149
  },
150
  {
151
  "epoch": 4.0,
152
  "eval_accuracy": 0.9442508710801394,
153
- "eval_loss": 0.13123387098312378,
154
- "eval_runtime": 2.4291,
155
- "eval_samples_per_second": 118.153,
156
- "eval_steps_per_second": 7.41,
157
  "step": 162
158
  },
159
  {
160
  "epoch": 4.197530864197531,
161
- "grad_norm": 7.277712821960449,
162
  "learning_rate": 8.333333333333334e-06,
163
- "loss": 0.1614,
164
  "step": 170
165
  },
166
  {
167
  "epoch": 4.444444444444445,
168
- "grad_norm": 13.366209030151367,
169
  "learning_rate": 5.555555555555556e-06,
170
- "loss": 0.1978,
171
  "step": 180
172
  },
173
  {
174
  "epoch": 4.6913580246913575,
175
- "grad_norm": 9.085039138793945,
176
  "learning_rate": 2.777777777777778e-06,
177
- "loss": 0.189,
178
  "step": 190
179
  },
180
  {
181
  "epoch": 4.938271604938271,
182
- "grad_norm": 7.736137866973877,
183
  "learning_rate": 0.0,
184
- "loss": 0.1757,
185
  "step": 200
186
  },
187
  {
188
  "epoch": 4.938271604938271,
189
- "eval_accuracy": 0.9477351916376306,
190
- "eval_loss": 0.11578787863254547,
191
- "eval_runtime": 2.7368,
192
- "eval_samples_per_second": 104.868,
193
- "eval_steps_per_second": 6.577,
194
  "step": 200
195
  },
196
  {
197
  "epoch": 4.938271604938271,
198
  "step": 200,
199
  "total_flos": 3.1727957353537536e+17,
200
- "train_loss": 0.41540566325187683,
201
- "train_runtime": 248.6508,
202
- "train_samples_per_second": 51.94,
203
- "train_steps_per_second": 0.804
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  }
205
  ],
206
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.9547038327526133,
3
  "best_model_checkpoint": "brain-tumor-classification/checkpoint-200",
4
  "epoch": 4.938271604938271,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.24691358024691357,
13
+ "grad_norm": 8.300398826599121,
14
  "learning_rate": 2.5e-05,
15
+ "loss": 1.3599,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.49382716049382713,
20
+ "grad_norm": 11.234930992126465,
21
  "learning_rate": 5e-05,
22
+ "loss": 1.0545,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.7407407407407407,
27
+ "grad_norm": 12.957039833068848,
28
  "learning_rate": 4.722222222222222e-05,
29
+ "loss": 0.7446,
30
  "step": 30
31
  },
32
  {
33
  "epoch": 0.9876543209876543,
34
+ "grad_norm": 13.77059268951416,
35
  "learning_rate": 4.4444444444444447e-05,
36
+ "loss": 0.6001,
37
  "step": 40
38
  },
39
  {
40
  "epoch": 0.9876543209876543,
41
+ "eval_accuracy": 0.8466898954703833,
42
+ "eval_loss": 0.4058869779109955,
43
+ "eval_runtime": 2.3913,
44
+ "eval_samples_per_second": 120.016,
45
+ "eval_steps_per_second": 7.527,
46
  "step": 40
47
  },
48
  {
49
  "epoch": 1.2345679012345678,
50
+ "grad_norm": 12.874049186706543,
51
  "learning_rate": 4.166666666666667e-05,
52
+ "loss": 0.4598,
53
  "step": 50
54
  },
55
  {
56
  "epoch": 1.4814814814814814,
57
+ "grad_norm": 15.899703979492188,
58
  "learning_rate": 3.888888888888889e-05,
59
+ "loss": 0.4362,
60
  "step": 60
61
  },
62
  {
63
  "epoch": 1.7283950617283952,
64
+ "grad_norm": 14.211153030395508,
65
  "learning_rate": 3.611111111111111e-05,
66
+ "loss": 0.4013,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.9753086419753085,
71
+ "grad_norm": 14.76016902923584,
72
  "learning_rate": 3.3333333333333335e-05,
73
+ "loss": 0.3615,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 2.0,
78
+ "eval_accuracy": 0.9128919860627178,
79
+ "eval_loss": 0.2405817061662674,
80
+ "eval_runtime": 4.5171,
81
+ "eval_samples_per_second": 63.537,
82
+ "eval_steps_per_second": 3.985,
83
  "step": 81
84
  },
85
  {
86
  "epoch": 2.2222222222222223,
87
+ "grad_norm": 10.028989791870117,
88
  "learning_rate": 3.055555555555556e-05,
89
+ "loss": 0.3202,
90
  "step": 90
91
  },
92
  {
93
  "epoch": 2.4691358024691357,
94
+ "grad_norm": 8.175374984741211,
95
  "learning_rate": 2.777777777777778e-05,
96
+ "loss": 0.2769,
97
  "step": 100
98
  },
99
  {
100
  "epoch": 2.7160493827160495,
101
+ "grad_norm": 7.94071102142334,
102
  "learning_rate": 2.5e-05,
103
+ "loss": 0.2848,
104
  "step": 110
105
  },
106
  {
107
  "epoch": 2.962962962962963,
108
+ "grad_norm": 14.77879524230957,
109
  "learning_rate": 2.2222222222222223e-05,
110
+ "loss": 0.2619,
111
  "step": 120
112
  },
113
  {
114
  "epoch": 2.9876543209876543,
115
+ "eval_accuracy": 0.9372822299651568,
116
+ "eval_loss": 0.18160930275917053,
117
+ "eval_runtime": 2.3852,
118
+ "eval_samples_per_second": 120.323,
119
+ "eval_steps_per_second": 7.546,
120
  "step": 121
121
  },
122
  {
123
  "epoch": 3.2098765432098766,
124
+ "grad_norm": 5.213917255401611,
125
  "learning_rate": 1.9444444444444445e-05,
126
+ "loss": 0.2809,
127
  "step": 130
128
  },
129
  {
130
  "epoch": 3.45679012345679,
131
+ "grad_norm": 9.913806915283203,
132
  "learning_rate": 1.6666666666666667e-05,
133
+ "loss": 0.2239,
134
  "step": 140
135
  },
136
  {
137
  "epoch": 3.7037037037037037,
138
+ "grad_norm": 9.552595138549805,
139
  "learning_rate": 1.388888888888889e-05,
140
+ "loss": 0.2331,
141
  "step": 150
142
  },
143
  {
144
  "epoch": 3.950617283950617,
145
+ "grad_norm": 13.436369895935059,
146
  "learning_rate": 1.1111111111111112e-05,
147
+ "loss": 0.2262,
148
  "step": 160
149
  },
150
  {
151
  "epoch": 4.0,
152
  "eval_accuracy": 0.9442508710801394,
153
+ "eval_loss": 0.14013153314590454,
154
+ "eval_runtime": 2.3881,
155
+ "eval_samples_per_second": 120.178,
156
+ "eval_steps_per_second": 7.537,
157
  "step": 162
158
  },
159
  {
160
  "epoch": 4.197530864197531,
161
+ "grad_norm": 5.0693039894104,
162
  "learning_rate": 8.333333333333334e-06,
163
+ "loss": 0.2334,
164
  "step": 170
165
  },
166
  {
167
  "epoch": 4.444444444444445,
168
+ "grad_norm": 9.17518424987793,
169
  "learning_rate": 5.555555555555556e-06,
170
+ "loss": 0.2374,
171
  "step": 180
172
  },
173
  {
174
  "epoch": 4.6913580246913575,
175
+ "grad_norm": 7.441468715667725,
176
  "learning_rate": 2.777777777777778e-06,
177
+ "loss": 0.2173,
178
  "step": 190
179
  },
180
  {
181
  "epoch": 4.938271604938271,
182
+ "grad_norm": 3.2817890644073486,
183
  "learning_rate": 0.0,
184
+ "loss": 0.179,
185
  "step": 200
186
  },
187
  {
188
  "epoch": 4.938271604938271,
189
+ "eval_accuracy": 0.9547038327526133,
190
+ "eval_loss": 0.12142720818519592,
191
+ "eval_runtime": 3.0543,
192
+ "eval_samples_per_second": 93.966,
193
+ "eval_steps_per_second": 5.893,
194
  "step": 200
195
  },
196
  {
197
  "epoch": 4.938271604938271,
198
  "step": 200,
199
  "total_flos": 3.1727957353537536e+17,
200
+ "train_loss": 0.4196511161327362,
201
+ "train_runtime": 246.9642,
202
+ "train_samples_per_second": 52.295,
203
+ "train_steps_per_second": 0.81
204
+ },
205
+ {
206
+ "epoch": 4.938271604938271,
207
+ "eval_accuracy": 0.9547038327526133,
208
+ "eval_loss": 0.12142720818519592,
209
+ "eval_runtime": 2.5284,
210
+ "eval_samples_per_second": 113.511,
211
+ "eval_steps_per_second": 7.119,
212
+ "step": 200
213
+ },
214
+ {
215
+ "epoch": 4.938271604938271,
216
+ "eval_accuracy": 0.9407665505226481,
217
+ "eval_loss": 0.16431476175785065,
218
+ "eval_runtime": 26.8879,
219
+ "eval_samples_per_second": 96.066,
220
+ "eval_steps_per_second": 6.025,
221
+ "step": 200
222
+ },
223
+ {
224
+ "epoch": 4.938271604938271,
225
+ "eval_accuracy": 0.9368950832365467,
226
+ "eval_loss": 0.16935844719409943,
227
+ "eval_runtime": 25.4894,
228
+ "eval_samples_per_second": 101.336,
229
+ "eval_steps_per_second": 6.356,
230
+ "step": 200
231
  }
232
  ],
233
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78ef78e2dad428c020819d09e1b5c0caf3c6c577718dbcd3c1403929d220398d
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a88135003244a417dc1864deba9655d21aa0384efab8f780d6cd82423fa59d4
3
  size 5240