krishna-exe commited on
Commit
e750e3c
1 Parent(s): 6c3fcdd

Training in progress, epoch 0

Browse files
Files changed (4) hide show
  1. all_results.json +6 -11
  2. model.safetensors +1 -1
  3. train_results.json +6 -6
  4. trainer_state.json +62 -178
all_results.json CHANGED
@@ -1,13 +1,8 @@
1
  {
2
- "epoch": 9.876543209876543,
3
- "eval_accuracy": 0.9721254355400697,
4
- "eval_loss": 0.07933783531188965,
5
- "eval_runtime": 2.8381,
6
- "eval_samples_per_second": 101.125,
7
- "eval_steps_per_second": 3.171,
8
- "total_flos": 6.343354306682266e+17,
9
- "train_loss": 0.2855896496772766,
10
- "train_runtime": 464.8772,
11
- "train_samples_per_second": 55.563,
12
- "train_steps_per_second": 0.43
13
  }
 
1
  {
2
+ "epoch": 4.938271604938271,
3
+ "total_flos": 3.1727957353537536e+17,
4
+ "train_loss": 0.5141408157348633,
5
+ "train_runtime": 269.7071,
6
+ "train_samples_per_second": 47.885,
7
+ "train_steps_per_second": 0.371
 
 
 
 
 
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:064f351ddf553d481d1a221ebc9733692dde775d77133e068606f6ddf69ecfed
3
  size 110348984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5671ef3cdb7c0d6b39d5800b57e58f8c339a64fc660900287c5c8926e7490cb
3
  size 110348984
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.876543209876543,
3
- "eval_accuracy": 0.9519938056523423,
4
- "eval_loss": 0.1284445822238922,
5
- "eval_runtime": 22.1837,
6
- "eval_samples_per_second": 116.437,
7
- "eval_steps_per_second": 3.651
8
  }
 
1
  {
2
+ "epoch": 4.938271604938271,
3
+ "total_flos": 3.1727957353537536e+17,
4
+ "train_loss": 0.5141408157348633,
5
+ "train_runtime": 269.7071,
6
+ "train_samples_per_second": 47.885,
7
+ "train_steps_per_second": 0.371
8
  }
trainer_state.json CHANGED
@@ -1,223 +1,107 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 9.876543209876543,
5
  "eval_steps": 500,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.9876543209876543,
13
- "grad_norm": 8.28109359741211,
14
- "learning_rate": 4.5e-05,
15
- "loss": 0.8062,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 0.9876543209876543,
20
- "eval_accuracy": 0.818815331010453,
21
- "eval_loss": 0.4432358145713806,
22
- "eval_runtime": 2.3752,
23
- "eval_samples_per_second": 120.833,
24
- "eval_steps_per_second": 3.789,
25
  "step": 20
26
  },
27
  {
28
  "epoch": 1.9753086419753085,
29
- "grad_norm": 9.780616760253906,
30
- "learning_rate": 4e-05,
31
- "loss": 0.4153,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 1.9753086419753085,
36
- "eval_accuracy": 0.8536585365853658,
37
- "eval_loss": 0.3407208323478699,
38
- "eval_runtime": 2.4373,
39
- "eval_samples_per_second": 117.752,
40
- "eval_steps_per_second": 3.693,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 2.962962962962963,
45
- "grad_norm": 6.261844635009766,
46
- "learning_rate": 3.5e-05,
47
- "loss": 0.3213,
48
  "step": 60
49
  },
50
  {
51
  "epoch": 2.962962962962963,
52
- "eval_accuracy": 0.9372822299651568,
53
- "eval_loss": 0.1876000016927719,
54
- "eval_runtime": 2.4811,
55
- "eval_samples_per_second": 115.675,
56
- "eval_steps_per_second": 3.627,
57
  "step": 60
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 10.462350845336914,
62
- "learning_rate": 2.975e-05,
63
- "loss": 0.2633,
64
  "step": 81
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.9442508710801394,
69
- "eval_loss": 0.15536989271640778,
70
- "eval_runtime": 2.8656,
71
- "eval_samples_per_second": 100.154,
72
- "eval_steps_per_second": 3.141,
73
  "step": 81
74
  },
75
  {
76
- "epoch": 4.987654320987654,
77
- "grad_norm": 6.831620693206787,
78
- "learning_rate": 2.4750000000000002e-05,
79
- "loss": 0.2201,
80
- "step": 101
81
- },
82
- {
83
- "epoch": 4.987654320987654,
84
- "eval_accuracy": 0.9547038327526133,
85
- "eval_loss": 0.13280798494815826,
86
- "eval_runtime": 2.4786,
87
- "eval_samples_per_second": 115.792,
88
- "eval_steps_per_second": 3.631,
89
- "step": 101
90
- },
91
- {
92
- "epoch": 5.9753086419753085,
93
- "grad_norm": 8.320969581604004,
94
- "learning_rate": 1.9750000000000002e-05,
95
- "loss": 0.2087,
96
- "step": 121
97
- },
98
- {
99
- "epoch": 5.9753086419753085,
100
- "eval_accuracy": 0.9721254355400697,
101
- "eval_loss": 0.08554696291685104,
102
- "eval_runtime": 2.6901,
103
- "eval_samples_per_second": 106.686,
104
- "eval_steps_per_second": 3.346,
105
- "step": 121
106
- },
107
- {
108
- "epoch": 6.962962962962963,
109
- "grad_norm": 5.462257385253906,
110
- "learning_rate": 1.475e-05,
111
- "loss": 0.1797,
112
- "step": 141
113
- },
114
- {
115
- "epoch": 6.962962962962963,
116
- "eval_accuracy": 0.9442508710801394,
117
- "eval_loss": 0.12809309363365173,
118
- "eval_runtime": 2.4948,
119
- "eval_samples_per_second": 115.041,
120
- "eval_steps_per_second": 3.608,
121
- "step": 141
122
- },
123
- {
124
- "epoch": 8.0,
125
- "grad_norm": 6.069087982177734,
126
- "learning_rate": 9.5e-06,
127
- "loss": 0.1478,
128
- "step": 162
129
- },
130
- {
131
- "epoch": 8.0,
132
- "eval_accuracy": 0.9721254355400697,
133
- "eval_loss": 0.08397921919822693,
134
- "eval_runtime": 2.5152,
135
- "eval_samples_per_second": 114.108,
136
- "eval_steps_per_second": 3.578,
137
- "step": 162
138
- },
139
- {
140
- "epoch": 8.987654320987655,
141
- "grad_norm": 12.428985595703125,
142
- "learning_rate": 4.5e-06,
143
- "loss": 0.1545,
144
- "step": 182
145
- },
146
- {
147
- "epoch": 8.987654320987655,
148
- "eval_accuracy": 0.9686411149825784,
149
- "eval_loss": 0.08367497473955154,
150
- "eval_runtime": 2.4429,
151
- "eval_samples_per_second": 117.485,
152
- "eval_steps_per_second": 3.684,
153
- "step": 182
154
- },
155
- {
156
- "epoch": 9.876543209876543,
157
- "grad_norm": 2.292888641357422,
158
  "learning_rate": 0.0,
159
- "loss": 0.1315,
160
- "step": 200
161
- },
162
- {
163
- "epoch": 9.876543209876543,
164
- "eval_accuracy": 0.9721254355400697,
165
- "eval_loss": 0.07933783531188965,
166
- "eval_runtime": 2.7805,
167
- "eval_samples_per_second": 103.217,
168
- "eval_steps_per_second": 3.237,
169
- "step": 200
170
- },
171
- {
172
- "epoch": 9.876543209876543,
173
- "step": 200,
174
- "total_flos": 6.343354306682266e+17,
175
- "train_loss": 0.2855896496772766,
176
- "train_runtime": 464.8772,
177
- "train_samples_per_second": 55.563,
178
- "train_steps_per_second": 0.43
179
- },
180
- {
181
- "epoch": 9.876543209876543,
182
- "eval_accuracy": 0.9562524196670538,
183
- "eval_loss": 0.11971130222082138,
184
- "eval_runtime": 24.8934,
185
- "eval_samples_per_second": 103.763,
186
- "eval_steps_per_second": 3.254,
187
- "step": 200
188
- },
189
- {
190
- "epoch": 9.876543209876543,
191
- "eval_accuracy": 0.9562524196670538,
192
- "eval_loss": 0.12554492056369781,
193
- "eval_runtime": 27.9535,
194
- "eval_samples_per_second": 92.403,
195
- "eval_steps_per_second": 2.898,
196
- "step": 200
197
- },
198
- {
199
- "epoch": 9.876543209876543,
200
- "eval_accuracy": 0.9721254355400697,
201
- "eval_loss": 0.07933783531188965,
202
- "eval_runtime": 2.5048,
203
- "eval_samples_per_second": 114.582,
204
- "eval_steps_per_second": 3.593,
205
- "step": 200
206
- },
207
- {
208
- "epoch": 9.876543209876543,
209
- "eval_accuracy": 0.9519938056523423,
210
- "eval_loss": 0.1284445822238922,
211
- "eval_runtime": 22.1837,
212
- "eval_samples_per_second": 116.437,
213
- "eval_steps_per_second": 3.651,
214
- "step": 200
215
  }
216
  ],
217
- "logging_steps": 10,
218
- "max_steps": 200,
219
  "num_input_tokens_seen": 0,
220
- "num_train_epochs": 10,
221
  "save_steps": 500,
222
  "stateful_callbacks": {
223
  "TrainerControl": {
@@ -231,7 +115,7 @@
231
  "attributes": {}
232
  }
233
  },
234
- "total_flos": 6.343354306682266e+17,
235
  "train_batch_size": 32,
236
  "trial_name": null,
237
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9128919860627178,
3
+ "best_model_checkpoint": "brain-tumor-classification/checkpoint-81",
4
+ "epoch": 4.938271604938271,
5
  "eval_steps": 500,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.9876543209876543,
13
+ "grad_norm": 13.366371154785156,
14
+ "learning_rate": 4.4444444444444447e-05,
15
+ "loss": 1.0827,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 0.9876543209876543,
20
+ "eval_accuracy": 0.7630662020905923,
21
+ "eval_loss": 0.5737153887748718,
22
+ "eval_runtime": 2.4539,
23
+ "eval_samples_per_second": 116.958,
24
+ "eval_steps_per_second": 3.668,
25
  "step": 20
26
  },
27
  {
28
  "epoch": 1.9753086419753085,
29
+ "grad_norm": 7.611490249633789,
30
+ "learning_rate": 3.3333333333333335e-05,
31
+ "loss": 0.5357,
32
  "step": 40
33
  },
34
  {
35
  "epoch": 1.9753086419753085,
36
+ "eval_accuracy": 0.8641114982578397,
37
+ "eval_loss": 0.3689139187335968,
38
+ "eval_runtime": 2.4398,
39
+ "eval_samples_per_second": 117.633,
40
+ "eval_steps_per_second": 3.689,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 2.962962962962963,
45
+ "grad_norm": 7.663794040679932,
46
+ "learning_rate": 2.2222222222222223e-05,
47
+ "loss": 0.3875,
48
  "step": 60
49
  },
50
  {
51
  "epoch": 2.962962962962963,
52
+ "eval_accuracy": 0.8954703832752613,
53
+ "eval_loss": 0.29916301369667053,
54
+ "eval_runtime": 2.5481,
55
+ "eval_samples_per_second": 112.634,
56
+ "eval_steps_per_second": 3.532,
57
  "step": 60
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 14.111477851867676,
62
+ "learning_rate": 1.0555555555555555e-05,
63
+ "loss": 0.2921,
64
  "step": 81
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.9128919860627178,
69
+ "eval_loss": 0.27263280749320984,
70
+ "eval_runtime": 2.6327,
71
+ "eval_samples_per_second": 109.015,
72
+ "eval_steps_per_second": 3.419,
73
  "step": 81
74
  },
75
  {
76
+ "epoch": 4.938271604938271,
77
+ "grad_norm": 9.39560317993164,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  "learning_rate": 0.0,
79
+ "loss": 0.2716,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 4.938271604938271,
84
+ "eval_accuracy": 0.9128919860627178,
85
+ "eval_loss": 0.23185838758945465,
86
+ "eval_runtime": 3.5061,
87
+ "eval_samples_per_second": 81.858,
88
+ "eval_steps_per_second": 2.567,
89
+ "step": 100
90
+ },
91
+ {
92
+ "epoch": 4.938271604938271,
93
+ "step": 100,
94
+ "total_flos": 3.1727957353537536e+17,
95
+ "train_loss": 0.5141408157348633,
96
+ "train_runtime": 269.7071,
97
+ "train_samples_per_second": 47.885,
98
+ "train_steps_per_second": 0.371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
  ],
101
+ "logging_steps": 500,
102
+ "max_steps": 100,
103
  "num_input_tokens_seen": 0,
104
+ "num_train_epochs": 5,
105
  "save_steps": 500,
106
  "stateful_callbacks": {
107
  "TrainerControl": {
 
115
  "attributes": {}
116
  }
117
  },
118
+ "total_flos": 3.1727957353537536e+17,
119
  "train_batch_size": 32,
120
  "trial_name": null,
121
  "trial_params": null