mijungkim commited on
Commit
a831e94
1 Parent(s): 7541f4e

End of training

Browse files
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 2.13,
3
- "eval_accuracy": 0.909112825458052,
4
- "eval_f1": 0.8837125963089052,
5
- "eval_loss": 0.6461763978004456,
6
- "eval_precision": 0.879144385026738,
7
- "eval_recall": 0.888328530259366,
8
- "eval_runtime": 15.0345,
9
  "eval_samples": 189,
10
- "eval_samples_per_second": 12.571,
11
- "eval_steps_per_second": 0.798,
12
- "train_loss": 1.3162074279785156,
13
- "train_runtime": 179.095,
14
  "train_samples": 752,
15
- "train_samples_per_second": 8.934,
16
- "train_steps_per_second": 0.558
17
  }
 
1
  {
2
+ "epoch": 21.28,
3
+ "eval_accuracy": 0.9908389585342333,
4
+ "eval_f1": 0.9868823000898472,
5
+ "eval_loss": 0.05581057444214821,
6
+ "eval_precision": 0.9845822875582646,
7
+ "eval_recall": 0.989193083573487,
8
+ "eval_runtime": 15.2739,
9
  "eval_samples": 189,
10
+ "eval_samples_per_second": 12.374,
11
+ "eval_steps_per_second": 0.786,
12
+ "train_loss": 0.1608761215209961,
13
+ "train_runtime": 1880.5995,
14
  "train_samples": 752,
15
+ "train_samples_per_second": 8.508,
16
+ "train_steps_per_second": 0.532
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 2.13,
3
- "eval_accuracy": 0.909112825458052,
4
- "eval_f1": 0.8837125963089052,
5
- "eval_loss": 0.6461763978004456,
6
- "eval_precision": 0.879144385026738,
7
- "eval_recall": 0.888328530259366,
8
- "eval_runtime": 15.0345,
9
  "eval_samples": 189,
10
- "eval_samples_per_second": 12.571,
11
- "eval_steps_per_second": 0.798
12
  }
 
1
  {
2
+ "epoch": 21.28,
3
+ "eval_accuracy": 0.9908389585342333,
4
+ "eval_f1": 0.9868823000898472,
5
+ "eval_loss": 0.05581057444214821,
6
+ "eval_precision": 0.9845822875582646,
7
+ "eval_recall": 0.989193083573487,
8
+ "eval_runtime": 15.2739,
9
  "eval_samples": 189,
10
+ "eval_samples_per_second": 12.374,
11
+ "eval_steps_per_second": 0.786
12
  }
runs/Dec20_17-18-14_pod-pasha/events.out.tfevents.1671558810.pod-pasha.541684.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df831f67092f73ac658ca971c347d14c5331db4b1283f747dd58af680fa232e8
3
+ size 512
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 2.13,
3
- "train_loss": 1.3162074279785156,
4
- "train_runtime": 179.095,
5
  "train_samples": 752,
6
- "train_samples_per_second": 8.934,
7
- "train_steps_per_second": 0.558
8
  }
 
1
  {
2
+ "epoch": 21.28,
3
+ "train_loss": 0.1608761215209961,
4
+ "train_runtime": 1880.5995,
5
  "train_samples": 752,
6
+ "train_samples_per_second": 8.508,
7
+ "train_steps_per_second": 0.532
8
  }
trainer_state.json CHANGED
@@ -1,37 +1,157 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 2.127659574468085,
5
- "global_step": 100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 2.13,
12
- "eval_accuracy": 0.909112825458052,
13
- "eval_f1": 0.8837125963089052,
14
- "eval_loss": 0.6461763978004456,
15
- "eval_precision": 0.879144385026738,
16
- "eval_recall": 0.888328530259366,
17
- "eval_runtime": 15.0755,
18
- "eval_samples_per_second": 12.537,
19
- "eval_steps_per_second": 0.796,
20
  "step": 100
21
  },
22
  {
23
- "epoch": 2.13,
24
- "step": 100,
25
- "total_flos": 424705445068800.0,
26
- "train_loss": 1.3162074279785156,
27
- "train_runtime": 179.095,
28
- "train_samples_per_second": 8.934,
29
- "train_steps_per_second": 0.558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  }
31
  ],
32
- "max_steps": 100,
33
- "num_train_epochs": 3,
34
- "total_flos": 424705445068800.0,
35
  "trial_name": null,
36
  "trial_params": null
37
  }
 
1
  {
2
+ "best_metric": 0.9868823000898472,
3
+ "best_model_checkpoint": "pasha/checkpoint-1000",
4
+ "epoch": 21.27659574468085,
5
+ "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 2.13,
12
+ "eval_accuracy": 0.9566055930568949,
13
+ "eval_f1": 0.9482633863965269,
14
+ "eval_loss": 0.2661653161048889,
15
+ "eval_precision": 0.9523982558139535,
16
+ "eval_recall": 0.944164265129683,
17
+ "eval_runtime": 15.4131,
18
+ "eval_samples_per_second": 12.262,
19
+ "eval_steps_per_second": 0.779,
20
  "step": 100
21
  },
22
  {
23
+ "epoch": 4.26,
24
+ "eval_accuracy": 0.9850530376084861,
25
+ "eval_f1": 0.9795185052102047,
26
+ "eval_loss": 0.1026068776845932,
27
+ "eval_precision": 0.9770609318996416,
28
+ "eval_recall": 0.9819884726224783,
29
+ "eval_runtime": 15.204,
30
+ "eval_samples_per_second": 12.431,
31
+ "eval_steps_per_second": 0.789,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 6.38,
36
+ "eval_accuracy": 0.9884281581485053,
37
+ "eval_f1": 0.9849137931034483,
38
+ "eval_loss": 0.07217290997505188,
39
+ "eval_precision": 0.9820916905444126,
40
+ "eval_recall": 0.9877521613832853,
41
+ "eval_runtime": 15.2143,
42
+ "eval_samples_per_second": 12.423,
43
+ "eval_steps_per_second": 0.789,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 8.51,
48
+ "eval_accuracy": 0.9891513982642237,
49
+ "eval_f1": 0.9857785778577858,
50
+ "eval_loss": 0.060767240822315216,
51
+ "eval_precision": 0.9852464915437208,
52
+ "eval_recall": 0.9863112391930836,
53
+ "eval_runtime": 15.3275,
54
+ "eval_samples_per_second": 12.331,
55
+ "eval_steps_per_second": 0.783,
56
+ "step": 400
57
+ },
58
+ {
59
+ "epoch": 10.64,
60
+ "learning_rate": 5e-06,
61
+ "loss": 0.2962,
62
+ "step": 500
63
+ },
64
+ {
65
+ "epoch": 10.64,
66
+ "eval_accuracy": 0.9889103182256509,
67
+ "eval_f1": 0.9854185418541853,
68
+ "eval_loss": 0.060581281781196594,
69
+ "eval_precision": 0.9848866498740554,
70
+ "eval_recall": 0.9859510086455331,
71
+ "eval_runtime": 14.8027,
72
+ "eval_samples_per_second": 12.768,
73
+ "eval_steps_per_second": 0.811,
74
+ "step": 500
75
+ },
76
+ {
77
+ "epoch": 12.77,
78
+ "eval_accuracy": 0.9920443587270974,
79
+ "eval_f1": 0.988501616960115,
80
+ "eval_loss": 0.0517994724214077,
81
+ "eval_precision": 0.986021505376344,
82
+ "eval_recall": 0.9909942363112392,
83
+ "eval_runtime": 14.7335,
84
+ "eval_samples_per_second": 12.828,
85
+ "eval_steps_per_second": 0.814,
86
+ "step": 600
87
+ },
88
+ {
89
+ "epoch": 14.89,
90
+ "eval_accuracy": 0.9922854387656702,
91
+ "eval_f1": 0.988679245283019,
92
+ "eval_loss": 0.052589546889066696,
93
+ "eval_precision": 0.9863750448189316,
94
+ "eval_recall": 0.9909942363112392,
95
+ "eval_runtime": 14.9339,
96
+ "eval_samples_per_second": 12.656,
97
+ "eval_steps_per_second": 0.804,
98
+ "step": 700
99
+ },
100
+ {
101
+ "epoch": 17.02,
102
+ "eval_accuracy": 0.991321118611379,
103
+ "eval_f1": 0.9872416891284815,
104
+ "eval_loss": 0.05428989231586456,
105
+ "eval_precision": 0.984940839010398,
106
+ "eval_recall": 0.9895533141210374,
107
+ "eval_runtime": 14.8022,
108
+ "eval_samples_per_second": 12.768,
109
+ "eval_steps_per_second": 0.811,
110
+ "step": 800
111
+ },
112
+ {
113
+ "epoch": 19.15,
114
+ "eval_accuracy": 0.9910800385728061,
115
+ "eval_f1": 0.9867002156721782,
116
+ "eval_loss": 0.05573796480894089,
117
+ "eval_precision": 0.9845767575322812,
118
+ "eval_recall": 0.9888328530259366,
119
+ "eval_runtime": 14.7741,
120
+ "eval_samples_per_second": 12.793,
121
+ "eval_steps_per_second": 0.812,
122
+ "step": 900
123
+ },
124
+ {
125
+ "epoch": 21.28,
126
+ "learning_rate": 0.0,
127
+ "loss": 0.0255,
128
+ "step": 1000
129
+ },
130
+ {
131
+ "epoch": 21.28,
132
+ "eval_accuracy": 0.9908389585342333,
133
+ "eval_f1": 0.9868823000898472,
134
+ "eval_loss": 0.05581057444214821,
135
+ "eval_precision": 0.9845822875582646,
136
+ "eval_recall": 0.989193083573487,
137
+ "eval_runtime": 15.103,
138
+ "eval_samples_per_second": 12.514,
139
+ "eval_steps_per_second": 0.795,
140
+ "step": 1000
141
+ },
142
+ {
143
+ "epoch": 21.28,
144
+ "step": 1000,
145
+ "total_flos": 4247054450688000.0,
146
+ "train_loss": 0.1608761215209961,
147
+ "train_runtime": 1880.5995,
148
+ "train_samples_per_second": 8.508,
149
+ "train_steps_per_second": 0.532
150
  }
151
  ],
152
+ "max_steps": 1000,
153
+ "num_train_epochs": 22,
154
+ "total_flos": 4247054450688000.0,
155
  "trial_name": null,
156
  "trial_params": null
157
  }