MatsRooth commited on
Commit
dabb5da
1 Parent(s): 2033682

json files after training

Browse files
all_results.json CHANGED
@@ -2,11 +2,11 @@
2
  "epoch": 5.0,
3
  "eval_accuracy": 0.9961612284069098,
4
  "eval_loss": 0.13847295939922333,
5
- "eval_runtime": 35.6829,
6
- "eval_samples_per_second": 14.601,
7
- "eval_steps_per_second": 0.476,
8
- "train_loss": 0.17887740957325904,
9
- "train_runtime": 628.2613,
10
- "train_samples_per_second": 29.494,
11
- "train_steps_per_second": 0.231
12
  }
 
2
  "epoch": 5.0,
3
  "eval_accuracy": 0.9961612284069098,
4
  "eval_loss": 0.13847295939922333,
5
+ "eval_runtime": 14.1605,
6
+ "eval_samples_per_second": 36.792,
7
+ "eval_steps_per_second": 1.201,
8
+ "train_loss": 0.17903739583903344,
9
+ "train_runtime": 421.3317,
10
+ "train_samples_per_second": 43.98,
11
+ "train_steps_per_second": 0.344
12
  }
eval_results.json CHANGED
@@ -2,7 +2,7 @@
2
  "epoch": 5.0,
3
  "eval_accuracy": 0.9961612284069098,
4
  "eval_loss": 0.13847295939922333,
5
- "eval_runtime": 35.6829,
6
- "eval_samples_per_second": 14.601,
7
- "eval_steps_per_second": 0.476
8
  }
 
2
  "epoch": 5.0,
3
  "eval_accuracy": 0.9961612284069098,
4
  "eval_loss": 0.13847295939922333,
5
+ "eval_runtime": 14.1605,
6
+ "eval_samples_per_second": 36.792,
7
+ "eval_steps_per_second": 1.201
8
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.17887740957325904,
4
- "train_runtime": 628.2613,
5
- "train_samples_per_second": 29.494,
6
- "train_steps_per_second": 0.231
7
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.17903739583903344,
4
+ "train_runtime": 421.3317,
5
+ "train_samples_per_second": 43.98,
6
+ "train_steps_per_second": 0.344
7
  }
trainer_state.json CHANGED
@@ -23,9 +23,9 @@
23
  "epoch": 1.0,
24
  "eval_accuracy": 0.9961612284069098,
25
  "eval_loss": 0.13847295939922333,
26
- "eval_runtime": 36.4538,
27
- "eval_samples_per_second": 14.292,
28
- "eval_steps_per_second": 0.466,
29
  "step": 29
30
  },
31
  {
@@ -37,34 +37,34 @@
37
  {
38
  "epoch": 1.38,
39
  "learning_rate": 2.423076923076923e-05,
40
- "loss": 0.151,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 1.72,
45
  "learning_rate": 2.1923076923076924e-05,
46
- "loss": 0.1297,
47
  "step": 50
48
  },
49
  {
50
  "epoch": 2.0,
51
  "eval_accuracy": 0.9961612284069098,
52
- "eval_loss": 0.051253072917461395,
53
- "eval_runtime": 36.428,
54
- "eval_samples_per_second": 14.302,
55
- "eval_steps_per_second": 0.467,
56
  "step": 58
57
  },
58
  {
59
  "epoch": 2.07,
60
  "learning_rate": 1.9615384615384617e-05,
61
- "loss": 0.1075,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 2.41,
66
  "learning_rate": 1.7307692307692306e-05,
67
- "loss": 0.084,
68
  "step": 70
69
  },
70
  {
@@ -76,10 +76,10 @@
76
  {
77
  "epoch": 3.0,
78
  "eval_accuracy": 0.9884836852207294,
79
- "eval_loss": 0.038927894085645676,
80
- "eval_runtime": 36.427,
81
- "eval_samples_per_second": 14.303,
82
- "eval_steps_per_second": 0.467,
83
  "step": 87
84
  },
85
  {
@@ -91,59 +91,59 @@
91
  {
92
  "epoch": 3.45,
93
  "learning_rate": 1.0384615384615384e-05,
94
- "loss": 0.0684,
95
  "step": 100
96
  },
97
  {
98
  "epoch": 3.79,
99
  "learning_rate": 8.076923076923077e-06,
100
- "loss": 0.058,
101
  "step": 110
102
  },
103
  {
104
  "epoch": 4.0,
105
  "eval_accuracy": 0.9923224568138196,
106
- "eval_loss": 0.030210411176085472,
107
- "eval_runtime": 36.3815,
108
- "eval_samples_per_second": 14.32,
109
- "eval_steps_per_second": 0.467,
110
  "step": 116
111
  },
112
  {
113
  "epoch": 4.14,
114
  "learning_rate": 5.76923076923077e-06,
115
- "loss": 0.0863,
116
  "step": 120
117
  },
118
  {
119
  "epoch": 4.48,
120
  "learning_rate": 3.4615384615384617e-06,
121
- "loss": 0.0669,
122
  "step": 130
123
  },
124
  {
125
  "epoch": 4.83,
126
  "learning_rate": 1.153846153846154e-06,
127
- "loss": 0.0481,
128
  "step": 140
129
  },
130
  {
131
  "epoch": 5.0,
132
- "eval_accuracy": 0.9942418426103646,
133
- "eval_loss": 0.024523714557290077,
134
- "eval_runtime": 36.414,
135
- "eval_samples_per_second": 14.308,
136
- "eval_steps_per_second": 0.467,
137
  "step": 145
138
  },
139
  {
140
  "epoch": 5.0,
141
  "step": 145,
142
  "total_flos": 1.682270628192e+17,
143
- "train_loss": 0.17887740957325904,
144
- "train_runtime": 628.2613,
145
- "train_samples_per_second": 29.494,
146
- "train_steps_per_second": 0.231
147
  }
148
  ],
149
  "max_steps": 145,
 
23
  "epoch": 1.0,
24
  "eval_accuracy": 0.9961612284069098,
25
  "eval_loss": 0.13847295939922333,
26
+ "eval_runtime": 13.4936,
27
+ "eval_samples_per_second": 38.611,
28
+ "eval_steps_per_second": 1.26,
29
  "step": 29
30
  },
31
  {
 
37
  {
38
  "epoch": 1.38,
39
  "learning_rate": 2.423076923076923e-05,
40
+ "loss": 0.1511,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 1.72,
45
  "learning_rate": 2.1923076923076924e-05,
46
+ "loss": 0.1289,
47
  "step": 50
48
  },
49
  {
50
  "epoch": 2.0,
51
  "eval_accuracy": 0.9961612284069098,
52
+ "eval_loss": 0.05099393427371979,
53
+ "eval_runtime": 13.2879,
54
+ "eval_samples_per_second": 39.209,
55
+ "eval_steps_per_second": 1.279,
56
  "step": 58
57
  },
58
  {
59
  "epoch": 2.07,
60
  "learning_rate": 1.9615384615384617e-05,
61
+ "loss": 0.1076,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 2.41,
66
  "learning_rate": 1.7307692307692306e-05,
67
+ "loss": 0.088,
68
  "step": 70
69
  },
70
  {
 
76
  {
77
  "epoch": 3.0,
78
  "eval_accuracy": 0.9884836852207294,
79
+ "eval_loss": 0.043337538838386536,
80
+ "eval_runtime": 13.6871,
81
+ "eval_samples_per_second": 38.065,
82
+ "eval_steps_per_second": 1.242,
83
  "step": 87
84
  },
85
  {
 
91
  {
92
  "epoch": 3.45,
93
  "learning_rate": 1.0384615384615384e-05,
94
+ "loss": 0.069,
95
  "step": 100
96
  },
97
  {
98
  "epoch": 3.79,
99
  "learning_rate": 8.076923076923077e-06,
100
+ "loss": 0.0605,
101
  "step": 110
102
  },
103
  {
104
  "epoch": 4.0,
105
  "eval_accuracy": 0.9923224568138196,
106
+ "eval_loss": 0.033004965633153915,
107
+ "eval_runtime": 13.3003,
108
+ "eval_samples_per_second": 39.172,
109
+ "eval_steps_per_second": 1.278,
110
  "step": 116
111
  },
112
  {
113
  "epoch": 4.14,
114
  "learning_rate": 5.76923076923077e-06,
115
+ "loss": 0.0835,
116
  "step": 120
117
  },
118
  {
119
  "epoch": 4.48,
120
  "learning_rate": 3.4615384615384617e-06,
121
+ "loss": 0.0671,
122
  "step": 130
123
  },
124
  {
125
  "epoch": 4.83,
126
  "learning_rate": 1.153846153846154e-06,
127
+ "loss": 0.0479,
128
  "step": 140
129
  },
130
  {
131
  "epoch": 5.0,
132
+ "eval_accuracy": 0.9904030710172744,
133
+ "eval_loss": 0.027281073853373528,
134
+ "eval_runtime": 13.6485,
135
+ "eval_samples_per_second": 38.173,
136
+ "eval_steps_per_second": 1.246,
137
  "step": 145
138
  },
139
  {
140
  "epoch": 5.0,
141
  "step": 145,
142
  "total_flos": 1.682270628192e+17,
143
+ "train_loss": 0.17903739583903344,
144
+ "train_runtime": 421.3317,
145
+ "train_samples_per_second": 43.98,
146
+ "train_steps_per_second": 0.344
147
  }
148
  ],
149
  "max_steps": 145,