krishna-exe commited on
Commit
d4b0c27
1 Parent(s): 6f552d9

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 4.938271604938271,
3
- "eval_accuracy": 0.9156020131629887,
4
- "eval_loss": 0.22556395828723907,
5
- "eval_runtime": 22.7546,
6
- "eval_samples_per_second": 113.516,
7
- "eval_steps_per_second": 3.56,
8
- "total_flos": 3.1727957353537536e+17,
9
- "train_loss": 0.4883076953887939,
10
- "train_runtime": 265.3223,
11
- "train_samples_per_second": 48.677,
12
- "train_steps_per_second": 0.377
13
  }
 
1
  {
2
+ "epoch": 9.876543209876543,
3
+ "eval_accuracy": 0.9721254355400697,
4
+ "eval_loss": 0.07933783531188965,
5
+ "eval_runtime": 2.8381,
6
+ "eval_samples_per_second": 101.125,
7
+ "eval_steps_per_second": 3.171,
8
+ "total_flos": 6.343354306682266e+17,
9
+ "train_loss": 0.2855896496772766,
10
+ "train_runtime": 464.8772,
11
+ "train_samples_per_second": 55.563,
12
+ "train_steps_per_second": 0.43
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.938271604938271,
3
- "eval_accuracy": 0.9372822299651568,
4
- "eval_loss": 0.1715514212846756,
5
- "eval_runtime": 2.7579,
6
- "eval_samples_per_second": 104.066,
7
- "eval_steps_per_second": 3.263
8
  }
 
1
  {
2
+ "epoch": 9.876543209876543,
3
+ "eval_accuracy": 0.9721254355400697,
4
+ "eval_loss": 0.07933783531188965,
5
+ "eval_runtime": 2.8381,
6
+ "eval_samples_per_second": 101.125,
7
+ "eval_steps_per_second": 3.171
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3352dce405fa7c3c45c175e3e1842a8d57b38370adbce10cc4b4e93a0b69c011
3
  size 110348984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:449c359d823337a57fae41d2b957610a181ccb985b72a6108f12fa811ee9fe48
3
  size 110348984
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.938271604938271,
3
- "eval_accuracy": 0.9156020131629887,
4
- "eval_loss": 0.22556395828723907,
5
- "eval_runtime": 22.7546,
6
- "eval_samples_per_second": 113.516,
7
- "eval_steps_per_second": 3.56
8
  }
 
1
  {
2
+ "epoch": 9.876543209876543,
3
+ "eval_accuracy": 0.9519938056523423,
4
+ "eval_loss": 0.1284445822238922,
5
+ "eval_runtime": 22.1837,
6
+ "eval_samples_per_second": 116.437,
7
+ "eval_steps_per_second": 3.651
8
  }
trainer_state.json CHANGED
@@ -1,160 +1,223 @@
1
  {
2
- "best_metric": 0.9372822299651568,
3
- "best_model_checkpoint": "brain-tumor-classification/checkpoint-81",
4
- "epoch": 4.938271604938271,
5
  "eval_steps": 500,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.49382716049382713,
13
- "grad_norm": 11.951680183410645,
14
- "learning_rate": 5e-05,
15
- "loss": 1.2238,
16
- "step": 10
17
- },
18
  {
19
  "epoch": 0.9876543209876543,
20
- "grad_norm": 11.546021461486816,
21
- "learning_rate": 4.4444444444444447e-05,
22
- "loss": 0.8509,
23
  "step": 20
24
  },
25
  {
26
  "epoch": 0.9876543209876543,
27
- "eval_accuracy": 0.8466898954703833,
28
- "eval_loss": 0.5305207967758179,
29
- "eval_runtime": 2.7821,
30
- "eval_samples_per_second": 103.16,
31
- "eval_steps_per_second": 3.235,
32
  "step": 20
33
  },
34
- {
35
- "epoch": 1.4814814814814814,
36
- "grad_norm": 13.172475814819336,
37
- "learning_rate": 3.888888888888889e-05,
38
- "loss": 0.5721,
39
- "step": 30
40
- },
41
  {
42
  "epoch": 1.9753086419753085,
43
- "grad_norm": 7.819632530212402,
44
- "learning_rate": 3.3333333333333335e-05,
45
- "loss": 0.4478,
46
  "step": 40
47
  },
48
  {
49
  "epoch": 1.9753086419753085,
50
- "eval_accuracy": 0.9094076655052264,
51
- "eval_loss": 0.3092304468154907,
52
- "eval_runtime": 2.5037,
53
- "eval_samples_per_second": 114.631,
54
- "eval_steps_per_second": 3.595,
55
  "step": 40
56
  },
57
- {
58
- "epoch": 2.4691358024691357,
59
- "grad_norm": 10.437914848327637,
60
- "learning_rate": 2.777777777777778e-05,
61
- "loss": 0.3668,
62
- "step": 50
63
- },
64
  {
65
  "epoch": 2.962962962962963,
66
- "grad_norm": 9.35677433013916,
67
- "learning_rate": 2.2222222222222223e-05,
68
- "loss": 0.3313,
69
  "step": 60
70
  },
71
  {
72
  "epoch": 2.962962962962963,
73
- "eval_accuracy": 0.9233449477351916,
74
- "eval_loss": 0.24220147728919983,
75
- "eval_runtime": 2.4542,
76
- "eval_samples_per_second": 116.944,
77
- "eval_steps_per_second": 3.667,
78
  "step": 60
79
  },
80
  {
81
- "epoch": 3.45679012345679,
82
- "grad_norm": 8.183242797851562,
83
- "learning_rate": 1.6666666666666667e-05,
84
- "loss": 0.2979,
85
- "step": 70
86
- },
87
- {
88
- "epoch": 3.950617283950617,
89
- "grad_norm": 8.208118438720703,
90
- "learning_rate": 1.1111111111111112e-05,
91
- "loss": 0.2777,
92
- "step": 80
93
  },
94
  {
95
  "epoch": 4.0,
96
- "eval_accuracy": 0.9372822299651568,
97
- "eval_loss": 0.1715514212846756,
98
- "eval_runtime": 2.7133,
99
- "eval_samples_per_second": 105.776,
100
- "eval_steps_per_second": 3.317,
101
  "step": 81
102
  },
103
  {
104
- "epoch": 4.444444444444445,
105
- "grad_norm": 8.672185897827148,
106
- "learning_rate": 5.555555555555556e-06,
107
- "loss": 0.2684,
108
- "step": 90
109
  },
110
  {
111
- "epoch": 4.938271604938271,
112
- "grad_norm": 10.231292724609375,
113
- "learning_rate": 0.0,
114
- "loss": 0.2465,
115
- "step": 100
 
 
116
  },
117
  {
118
- "epoch": 4.938271604938271,
119
- "eval_accuracy": 0.9372822299651568,
120
- "eval_loss": 0.16363371908664703,
121
- "eval_runtime": 2.7577,
122
- "eval_samples_per_second": 104.074,
123
- "eval_steps_per_second": 3.264,
124
- "step": 100
125
  },
126
  {
127
- "epoch": 4.938271604938271,
128
- "step": 100,
129
- "total_flos": 3.1727957353537536e+17,
130
- "train_loss": 0.4883076953887939,
131
- "train_runtime": 265.3223,
132
- "train_samples_per_second": 48.677,
133
- "train_steps_per_second": 0.377
134
  },
135
  {
136
- "epoch": 4.938271604938271,
137
- "eval_accuracy": 0.9372822299651568,
138
- "eval_loss": 0.1715514212846756,
139
- "eval_runtime": 2.7579,
140
- "eval_samples_per_second": 104.066,
141
- "eval_steps_per_second": 3.263,
142
- "step": 100
143
- },
144
- {
145
- "epoch": 4.938271604938271,
146
- "eval_accuracy": 0.9156020131629887,
147
- "eval_loss": 0.22556395828723907,
148
- "eval_runtime": 22.7546,
149
- "eval_samples_per_second": 113.516,
150
- "eval_steps_per_second": 3.56,
151
- "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  }
153
  ],
154
  "logging_steps": 10,
155
- "max_steps": 100,
156
  "num_input_tokens_seen": 0,
157
- "num_train_epochs": 5,
158
  "save_steps": 500,
159
  "stateful_callbacks": {
160
  "TrainerControl": {
@@ -168,7 +231,7 @@
168
  "attributes": {}
169
  }
170
  },
171
- "total_flos": 3.1727957353537536e+17,
172
  "train_batch_size": 32,
173
  "trial_name": null,
174
  "trial_params": null
 
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.876543209876543,
5
  "eval_steps": 500,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.9876543209876543,
13
+ "grad_norm": 8.28109359741211,
14
+ "learning_rate": 4.5e-05,
15
+ "loss": 0.8062,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 0.9876543209876543,
20
+ "eval_accuracy": 0.818815331010453,
21
+ "eval_loss": 0.4432358145713806,
22
+ "eval_runtime": 2.3752,
23
+ "eval_samples_per_second": 120.833,
24
+ "eval_steps_per_second": 3.789,
25
  "step": 20
26
  },
 
 
 
 
 
 
 
27
  {
28
  "epoch": 1.9753086419753085,
29
+ "grad_norm": 9.780616760253906,
30
+ "learning_rate": 4e-05,
31
+ "loss": 0.4153,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 1.9753086419753085,
36
+ "eval_accuracy": 0.8536585365853658,
37
+ "eval_loss": 0.3407208323478699,
38
+ "eval_runtime": 2.4373,
39
+ "eval_samples_per_second": 117.752,
40
+ "eval_steps_per_second": 3.693,
41
  "step": 40
42
  },
 
 
 
 
 
 
 
43
  {
44
  "epoch": 2.962962962962963,
45
+ "grad_norm": 6.261844635009766,
46
+ "learning_rate": 3.5e-05,
47
+ "loss": 0.3213,
48
  "step": 60
49
  },
50
  {
51
  "epoch": 2.962962962962963,
52
+ "eval_accuracy": 0.9372822299651568,
53
+ "eval_loss": 0.1876000016927719,
54
+ "eval_runtime": 2.4811,
55
+ "eval_samples_per_second": 115.675,
56
+ "eval_steps_per_second": 3.627,
57
  "step": 60
58
  },
59
  {
60
+ "epoch": 4.0,
61
+ "grad_norm": 10.462350845336914,
62
+ "learning_rate": 2.975e-05,
63
+ "loss": 0.2633,
64
+ "step": 81
 
 
 
 
 
 
 
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.9442508710801394,
69
+ "eval_loss": 0.15536989271640778,
70
+ "eval_runtime": 2.8656,
71
+ "eval_samples_per_second": 100.154,
72
+ "eval_steps_per_second": 3.141,
73
  "step": 81
74
  },
75
  {
76
+ "epoch": 4.987654320987654,
77
+ "grad_norm": 6.831620693206787,
78
+ "learning_rate": 2.4750000000000002e-05,
79
+ "loss": 0.2201,
80
+ "step": 101
81
  },
82
  {
83
+ "epoch": 4.987654320987654,
84
+ "eval_accuracy": 0.9547038327526133,
85
+ "eval_loss": 0.13280798494815826,
86
+ "eval_runtime": 2.4786,
87
+ "eval_samples_per_second": 115.792,
88
+ "eval_steps_per_second": 3.631,
89
+ "step": 101
90
  },
91
  {
92
+ "epoch": 5.9753086419753085,
93
+ "grad_norm": 8.320969581604004,
94
+ "learning_rate": 1.9750000000000002e-05,
95
+ "loss": 0.2087,
96
+ "step": 121
 
 
97
  },
98
  {
99
+ "epoch": 5.9753086419753085,
100
+ "eval_accuracy": 0.9721254355400697,
101
+ "eval_loss": 0.08554696291685104,
102
+ "eval_runtime": 2.6901,
103
+ "eval_samples_per_second": 106.686,
104
+ "eval_steps_per_second": 3.346,
105
+ "step": 121
106
  },
107
  {
108
+ "epoch": 6.962962962962963,
109
+ "grad_norm": 5.462257385253906,
110
+ "learning_rate": 1.475e-05,
111
+ "loss": 0.1797,
112
+ "step": 141
113
+ },
114
+ {
115
+ "epoch": 6.962962962962963,
116
+ "eval_accuracy": 0.9442508710801394,
117
+ "eval_loss": 0.12809309363365173,
118
+ "eval_runtime": 2.4948,
119
+ "eval_samples_per_second": 115.041,
120
+ "eval_steps_per_second": 3.608,
121
+ "step": 141
122
+ },
123
+ {
124
+ "epoch": 8.0,
125
+ "grad_norm": 6.069087982177734,
126
+ "learning_rate": 9.5e-06,
127
+ "loss": 0.1478,
128
+ "step": 162
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "eval_accuracy": 0.9721254355400697,
133
+ "eval_loss": 0.08397921919822693,
134
+ "eval_runtime": 2.5152,
135
+ "eval_samples_per_second": 114.108,
136
+ "eval_steps_per_second": 3.578,
137
+ "step": 162
138
+ },
139
+ {
140
+ "epoch": 8.987654320987655,
141
+ "grad_norm": 12.428985595703125,
142
+ "learning_rate": 4.5e-06,
143
+ "loss": 0.1545,
144
+ "step": 182
145
+ },
146
+ {
147
+ "epoch": 8.987654320987655,
148
+ "eval_accuracy": 0.9686411149825784,
149
+ "eval_loss": 0.08367497473955154,
150
+ "eval_runtime": 2.4429,
151
+ "eval_samples_per_second": 117.485,
152
+ "eval_steps_per_second": 3.684,
153
+ "step": 182
154
+ },
155
+ {
156
+ "epoch": 9.876543209876543,
157
+ "grad_norm": 2.292888641357422,
158
+ "learning_rate": 0.0,
159
+ "loss": 0.1315,
160
+ "step": 200
161
+ },
162
+ {
163
+ "epoch": 9.876543209876543,
164
+ "eval_accuracy": 0.9721254355400697,
165
+ "eval_loss": 0.07933783531188965,
166
+ "eval_runtime": 2.7805,
167
+ "eval_samples_per_second": 103.217,
168
+ "eval_steps_per_second": 3.237,
169
+ "step": 200
170
+ },
171
+ {
172
+ "epoch": 9.876543209876543,
173
+ "step": 200,
174
+ "total_flos": 6.343354306682266e+17,
175
+ "train_loss": 0.2855896496772766,
176
+ "train_runtime": 464.8772,
177
+ "train_samples_per_second": 55.563,
178
+ "train_steps_per_second": 0.43
179
+ },
180
+ {
181
+ "epoch": 9.876543209876543,
182
+ "eval_accuracy": 0.9562524196670538,
183
+ "eval_loss": 0.11971130222082138,
184
+ "eval_runtime": 24.8934,
185
+ "eval_samples_per_second": 103.763,
186
+ "eval_steps_per_second": 3.254,
187
+ "step": 200
188
+ },
189
+ {
190
+ "epoch": 9.876543209876543,
191
+ "eval_accuracy": 0.9562524196670538,
192
+ "eval_loss": 0.12554492056369781,
193
+ "eval_runtime": 27.9535,
194
+ "eval_samples_per_second": 92.403,
195
+ "eval_steps_per_second": 2.898,
196
+ "step": 200
197
+ },
198
+ {
199
+ "epoch": 9.876543209876543,
200
+ "eval_accuracy": 0.9721254355400697,
201
+ "eval_loss": 0.07933783531188965,
202
+ "eval_runtime": 2.5048,
203
+ "eval_samples_per_second": 114.582,
204
+ "eval_steps_per_second": 3.593,
205
+ "step": 200
206
+ },
207
+ {
208
+ "epoch": 9.876543209876543,
209
+ "eval_accuracy": 0.9519938056523423,
210
+ "eval_loss": 0.1284445822238922,
211
+ "eval_runtime": 22.1837,
212
+ "eval_samples_per_second": 116.437,
213
+ "eval_steps_per_second": 3.651,
214
+ "step": 200
215
  }
216
  ],
217
  "logging_steps": 10,
218
+ "max_steps": 200,
219
  "num_input_tokens_seen": 0,
220
+ "num_train_epochs": 10,
221
  "save_steps": 500,
222
  "stateful_callbacks": {
223
  "TrainerControl": {
 
231
  "attributes": {}
232
  }
233
  },
234
+ "total_flos": 6.343354306682266e+17,
235
  "train_batch_size": 32,
236
  "trial_name": null,
237
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c2d94641c05a8dc4c54effcc8b8b87b4826b5db844cbebecb5ec09a94ca76ce
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07f5f871bd369d923261f3adab95a749dfd8062b1c7d23a971796b6ca6ca54c9
3
  size 5176