krishna-exe commited on
Commit
f543c2b
1 Parent(s): 466bc7b

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
  "epoch": 4.938271604938271,
 
 
 
 
 
3
  "total_flos": 3.1727957353537536e+17,
4
- "train_loss": 0.39835033178329465,
5
- "train_runtime": 251.5945,
6
- "train_samples_per_second": 51.333,
7
- "train_steps_per_second": 0.795
8
  }
 
1
  {
2
  "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9616724738675958,
4
+ "eval_loss": 0.11273417621850967,
5
+ "eval_runtime": 2.4706,
6
+ "eval_samples_per_second": 116.166,
7
+ "eval_steps_per_second": 7.286,
8
  "total_flos": 3.1727957353537536e+17,
9
+ "train_loss": 0.13397518873214723,
10
+ "train_runtime": 259.5354,
11
+ "train_samples_per_second": 49.762,
12
+ "train_steps_per_second": 0.771
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.938271604938271,
3
- "eval_accuracy": 0.9547038327526133,
4
- "eval_loss": 0.12142720818519592,
5
- "eval_runtime": 2.5284,
6
- "eval_samples_per_second": 113.511,
7
- "eval_steps_per_second": 7.119
8
  }
 
1
  {
2
  "epoch": 4.938271604938271,
3
+ "eval_accuracy": 0.9616724738675958,
4
+ "eval_loss": 0.11273417621850967,
5
+ "eval_runtime": 2.4706,
6
+ "eval_samples_per_second": 116.166,
7
+ "eval_steps_per_second": 7.286
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ee704706002a80cbfd4afdb5adbaeb3b0c98b850ec8ccfab6cda940d54f0ab6b
3
  size 110348984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0fc9da5c679d835c5b5188985e89e842b3b07dc57b274b60b5adc196921ba94
3
  size 110348984
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.938271604938271,
3
  "total_flos": 3.1727957353537536e+17,
4
- "train_loss": 0.39835033178329465,
5
- "train_runtime": 251.5945,
6
- "train_samples_per_second": 51.333,
7
- "train_steps_per_second": 0.795
8
  }
 
1
  {
2
  "epoch": 4.938271604938271,
3
  "total_flos": 3.1727957353537536e+17,
4
+ "train_loss": 0.13397518873214723,
5
+ "train_runtime": 259.5354,
6
+ "train_samples_per_second": 49.762,
7
+ "train_steps_per_second": 0.771
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.9442508710801394,
3
  "best_model_checkpoint": "brain-tumor-classification/checkpoint-200",
4
  "epoch": 4.938271604938271,
5
  "eval_steps": 500,
@@ -10,92 +10,92 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.9876543209876543,
13
- "grad_norm": 26.882118225097656,
14
  "learning_rate": 4.4444444444444447e-05,
15
- "loss": 0.87,
16
  "step": 40
17
  },
18
  {
19
  "epoch": 0.9876543209876543,
20
- "eval_accuracy": 0.8710801393728222,
21
- "eval_loss": 0.3874738812446594,
22
- "eval_runtime": 4.2257,
23
- "eval_samples_per_second": 67.917,
24
- "eval_steps_per_second": 4.26,
25
  "step": 40
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 6.630888938903809,
30
  "learning_rate": 3.3055555555555553e-05,
31
- "loss": 0.426,
32
  "step": 81
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.9024390243902439,
37
- "eval_loss": 0.22352387011051178,
38
- "eval_runtime": 2.3991,
39
- "eval_samples_per_second": 119.628,
40
- "eval_steps_per_second": 7.503,
41
  "step": 81
42
  },
43
  {
44
  "epoch": 2.9876543209876543,
45
- "grad_norm": 9.56891918182373,
46
  "learning_rate": 2.1944444444444445e-05,
47
- "loss": 0.292,
48
  "step": 121
49
  },
50
  {
51
  "epoch": 2.9876543209876543,
52
- "eval_accuracy": 0.926829268292683,
53
- "eval_loss": 0.2056380957365036,
54
- "eval_runtime": 2.3874,
55
- "eval_samples_per_second": 120.214,
56
- "eval_steps_per_second": 7.54,
57
  "step": 121
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 7.695461273193359,
62
  "learning_rate": 1.0555555555555555e-05,
63
- "loss": 0.217,
64
  "step": 162
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.9337979094076655,
69
- "eval_loss": 0.1578059196472168,
70
- "eval_runtime": 2.6136,
71
- "eval_samples_per_second": 109.811,
72
- "eval_steps_per_second": 6.887,
73
  "step": 162
74
  },
75
  {
76
  "epoch": 4.938271604938271,
77
- "grad_norm": 7.951013088226318,
78
  "learning_rate": 0.0,
79
- "loss": 0.1797,
80
  "step": 200
81
  },
82
  {
83
  "epoch": 4.938271604938271,
84
- "eval_accuracy": 0.9442508710801394,
85
- "eval_loss": 0.12732850015163422,
86
- "eval_runtime": 2.9064,
87
- "eval_samples_per_second": 98.748,
88
- "eval_steps_per_second": 6.193,
89
  "step": 200
90
  },
91
  {
92
  "epoch": 4.938271604938271,
93
  "step": 200,
94
  "total_flos": 3.1727957353537536e+17,
95
- "train_loss": 0.39835033178329465,
96
- "train_runtime": 251.5945,
97
- "train_samples_per_second": 51.333,
98
- "train_steps_per_second": 0.795
99
  }
100
  ],
101
  "logging_steps": 10,
 
1
  {
2
+ "best_metric": 0.9616724738675958,
3
  "best_model_checkpoint": "brain-tumor-classification/checkpoint-200",
4
  "epoch": 4.938271604938271,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.9876543209876543,
13
+ "grad_norm": 10.068439483642578,
14
  "learning_rate": 4.4444444444444447e-05,
15
+ "loss": 0.1888,
16
  "step": 40
17
  },
18
  {
19
  "epoch": 0.9876543209876543,
20
+ "eval_accuracy": 0.9442508710801394,
21
+ "eval_loss": 0.15003159642219543,
22
+ "eval_runtime": 3.4689,
23
+ "eval_samples_per_second": 82.736,
24
+ "eval_steps_per_second": 5.189,
25
  "step": 40
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 9.355758666992188,
30
  "learning_rate": 3.3055555555555553e-05,
31
+ "loss": 0.1514,
32
  "step": 81
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.9581881533101045,
37
+ "eval_loss": 0.12207956612110138,
38
+ "eval_runtime": 2.4115,
39
+ "eval_samples_per_second": 119.012,
40
+ "eval_steps_per_second": 7.464,
41
  "step": 81
42
  },
43
  {
44
  "epoch": 2.9876543209876543,
45
+ "grad_norm": 11.253016471862793,
46
  "learning_rate": 2.1944444444444445e-05,
47
+ "loss": 0.1189,
48
  "step": 121
49
  },
50
  {
51
  "epoch": 2.9876543209876543,
52
+ "eval_accuracy": 0.9442508710801394,
53
+ "eval_loss": 0.16469331085681915,
54
+ "eval_runtime": 2.3749,
55
+ "eval_samples_per_second": 120.849,
56
+ "eval_steps_per_second": 7.579,
57
  "step": 121
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 10.731207847595215,
62
  "learning_rate": 1.0555555555555555e-05,
63
+ "loss": 0.1042,
64
  "step": 162
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.9581881533101045,
69
+ "eval_loss": 0.12243915349245071,
70
+ "eval_runtime": 2.571,
71
+ "eval_samples_per_second": 111.632,
72
+ "eval_steps_per_second": 7.001,
73
  "step": 162
74
  },
75
  {
76
  "epoch": 4.938271604938271,
77
+ "grad_norm": 7.372400283813477,
78
  "learning_rate": 0.0,
79
+ "loss": 0.1055,
80
  "step": 200
81
  },
82
  {
83
  "epoch": 4.938271604938271,
84
+ "eval_accuracy": 0.9616724738675958,
85
+ "eval_loss": 0.11273417621850967,
86
+ "eval_runtime": 2.687,
87
+ "eval_samples_per_second": 106.809,
88
+ "eval_steps_per_second": 6.699,
89
  "step": 200
90
  },
91
  {
92
  "epoch": 4.938271604938271,
93
  "step": 200,
94
  "total_flos": 3.1727957353537536e+17,
95
+ "train_loss": 0.13397518873214723,
96
+ "train_runtime": 259.5354,
97
+ "train_samples_per_second": 49.762,
98
+ "train_steps_per_second": 0.771
99
  }
100
  ],
101
  "logging_steps": 10,