MHGanainy commited on
Commit
8936342
1 Parent(s): 08f4251

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. train_results.json +6 -6
  3. trainer_state.json +61 -78
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 8.0,
3
- "total_flos": 3.7940453059015475e+17,
4
- "train_loss": 0.10690519699813626,
5
- "train_runtime": 1502.7914,
6
  "train_samples": 9000,
7
- "train_samples_per_second": 119.777,
8
- "train_steps_per_second": 3.753
9
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "total_flos": 3.9803472298849075e+17,
4
+ "train_loss": 0.11263628595385025,
5
+ "train_runtime": 1312.5646,
6
  "train_samples": 9000,
7
+ "train_samples_per_second": 137.136,
8
+ "train_steps_per_second": 4.297
9
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 8.0,
3
- "total_flos": 3.7940453059015475e+17,
4
- "train_loss": 0.10690519699813626,
5
- "train_runtime": 1502.7914,
6
  "train_samples": 9000,
7
- "train_samples_per_second": 119.777,
8
- "train_steps_per_second": 3.753
9
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "total_flos": 3.9803472298849075e+17,
4
+ "train_loss": 0.11263628595385025,
5
+ "train_runtime": 1312.5646,
6
  "train_samples": 9000,
7
+ "train_samples_per_second": 137.136,
8
+ "train_steps_per_second": 4.297
9
  }
trainer_state.json CHANGED
@@ -1,129 +1,112 @@
1
  {
2
- "best_metric": 0.7964841108857336,
3
- "best_model_checkpoint": "logs/ecthr_b/MHGanainy/xmod-shared-roberta-base-legal-multi/seed_1/checkpoint-1410",
4
- "epoch": 8.0,
5
  "eval_steps": 500,
6
- "global_step": 2256,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.1879543662071228,
14
- "eval_macro-f1": 0.637208986436089,
15
- "eval_micro-f1": 0.7414155571128241,
16
- "eval_runtime": 9.9431,
17
- "eval_samples_per_second": 100.573,
18
- "eval_steps_per_second": 3.218,
19
  "step": 282
20
  },
21
  {
22
  "epoch": 1.773049645390071,
23
- "grad_norm": 1.1033704280853271,
24
  "learning_rate": 2.7345744680851065e-05,
25
- "loss": 0.1761,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
- "eval_loss": 0.19721177220344543,
31
- "eval_macro-f1": 0.6534255535190332,
32
- "eval_micro-f1": 0.76043673731535,
33
- "eval_runtime": 13.6625,
34
- "eval_samples_per_second": 73.193,
35
- "eval_steps_per_second": 2.342,
36
  "step": 564
37
  },
38
  {
39
  "epoch": 3.0,
40
- "eval_loss": 0.1727759689092636,
41
- "eval_macro-f1": 0.7189646935925815,
42
- "eval_micro-f1": 0.7865168539325843,
43
- "eval_runtime": 9.5271,
44
- "eval_samples_per_second": 104.964,
45
- "eval_steps_per_second": 3.359,
46
  "step": 846
47
  },
48
  {
49
  "epoch": 3.546099290780142,
50
- "grad_norm": 1.1534513235092163,
51
  "learning_rate": 2.4686170212765958e-05,
52
- "loss": 0.1155,
53
  "step": 1000
54
  },
55
  {
56
  "epoch": 4.0,
57
- "eval_loss": 0.1755395233631134,
58
- "eval_macro-f1": 0.7373513816426139,
59
- "eval_micro-f1": 0.7873910127431254,
60
- "eval_runtime": 10.0776,
61
- "eval_samples_per_second": 99.23,
62
- "eval_steps_per_second": 3.175,
63
  "step": 1128
64
  },
65
  {
66
  "epoch": 5.0,
67
- "eval_loss": 0.17423860728740692,
68
- "eval_macro-f1": 0.7353822537422677,
69
- "eval_micro-f1": 0.7964841108857336,
70
- "eval_runtime": 10.1727,
71
- "eval_samples_per_second": 98.302,
72
- "eval_steps_per_second": 3.146,
73
  "step": 1410
74
  },
75
  {
76
  "epoch": 5.319148936170213,
77
- "grad_norm": 1.2388067245483398,
78
- "learning_rate": 2.2026595744680854e-05,
79
- "loss": 0.091,
80
  "step": 1500
81
  },
82
  {
83
  "epoch": 6.0,
84
- "eval_loss": 0.19554495811462402,
85
- "eval_macro-f1": 0.7451892220071915,
86
- "eval_micro-f1": 0.7875816993464052,
87
- "eval_runtime": 13.6476,
88
- "eval_samples_per_second": 73.273,
89
- "eval_steps_per_second": 2.345,
90
  "step": 1692
91
  },
92
  {
93
  "epoch": 7.0,
94
- "eval_loss": 0.1909777820110321,
95
- "eval_macro-f1": 0.7475109109665823,
96
- "eval_micro-f1": 0.7894201424211598,
97
- "eval_runtime": 9.5034,
98
- "eval_samples_per_second": 105.225,
99
- "eval_steps_per_second": 3.367,
100
  "step": 1974
101
  },
102
  {
103
- "epoch": 7.092198581560283,
104
- "grad_norm": 1.6909549236297607,
105
- "learning_rate": 1.9367021276595743e-05,
106
- "loss": 0.071,
107
- "step": 2000
108
- },
109
- {
110
- "epoch": 8.0,
111
- "eval_loss": 0.2065315544605255,
112
- "eval_macro-f1": 0.7429317897586639,
113
- "eval_micro-f1": 0.785572468563865,
114
- "eval_runtime": 9.4369,
115
- "eval_samples_per_second": 105.967,
116
- "eval_steps_per_second": 3.391,
117
- "step": 2256
118
- },
119
- {
120
- "epoch": 8.0,
121
- "step": 2256,
122
- "total_flos": 3.7940453059015475e+17,
123
- "train_loss": 0.10690519699813626,
124
- "train_runtime": 1502.7914,
125
- "train_samples_per_second": 119.777,
126
- "train_steps_per_second": 3.753
127
  }
128
  ],
129
  "logging_steps": 500,
@@ -152,7 +135,7 @@
152
  "attributes": {}
153
  }
154
  },
155
- "total_flos": 3.7940453059015475e+17,
156
  "train_batch_size": 16,
157
  "trial_name": null,
158
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7924403183023873,
3
+ "best_model_checkpoint": "logs/ecthr_b/MHGanainy/xmod-roberta-base-legal-multi/seed_1/checkpoint-1128",
4
+ "epoch": 7.0,
5
  "eval_steps": 500,
6
+ "global_step": 1974,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.18594583868980408,
14
+ "eval_macro-f1": 0.6610565197291588,
15
+ "eval_micro-f1": 0.7678571428571429,
16
+ "eval_runtime": 9.434,
17
+ "eval_samples_per_second": 105.999,
18
+ "eval_steps_per_second": 3.392,
19
  "step": 282
20
  },
21
  {
22
  "epoch": 1.773049645390071,
23
+ "grad_norm": 1.6924870014190674,
24
  "learning_rate": 2.7345744680851065e-05,
25
+ "loss": 0.1734,
26
  "step": 500
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "eval_loss": 0.204640731215477,
31
+ "eval_macro-f1": 0.6510354952925879,
32
+ "eval_micro-f1": 0.7552314521242867,
33
+ "eval_runtime": 10.5166,
34
+ "eval_samples_per_second": 95.088,
35
+ "eval_steps_per_second": 3.043,
36
  "step": 564
37
  },
38
  {
39
  "epoch": 3.0,
40
+ "eval_loss": 0.17442859709262848,
41
+ "eval_macro-f1": 0.7103393399151534,
42
+ "eval_micro-f1": 0.7856437273625289,
43
+ "eval_runtime": 9.5348,
44
+ "eval_samples_per_second": 104.879,
45
+ "eval_steps_per_second": 3.356,
46
  "step": 846
47
  },
48
  {
49
  "epoch": 3.546099290780142,
50
+ "grad_norm": 0.9688987135887146,
51
  "learning_rate": 2.4686170212765958e-05,
52
+ "loss": 0.1145,
53
  "step": 1000
54
  },
55
  {
56
  "epoch": 4.0,
57
+ "eval_loss": 0.18016603589057922,
58
+ "eval_macro-f1": 0.739214459676758,
59
+ "eval_micro-f1": 0.7924403183023873,
60
+ "eval_runtime": 10.4478,
61
+ "eval_samples_per_second": 95.714,
62
+ "eval_steps_per_second": 3.063,
63
  "step": 1128
64
  },
65
  {
66
  "epoch": 5.0,
67
+ "eval_loss": 0.19197727739810944,
68
+ "eval_macro-f1": 0.721481034377041,
69
+ "eval_micro-f1": 0.7874427730542839,
70
+ "eval_runtime": 10.2803,
71
+ "eval_samples_per_second": 97.273,
72
+ "eval_steps_per_second": 3.113,
73
  "step": 1410
74
  },
75
  {
76
  "epoch": 5.319148936170213,
77
+ "grad_norm": 0.9491854310035706,
78
+ "learning_rate": 2.203191489361702e-05,
79
+ "loss": 0.0906,
80
  "step": 1500
81
  },
82
  {
83
  "epoch": 6.0,
84
+ "eval_loss": 0.19124138355255127,
85
+ "eval_macro-f1": 0.7336639771212009,
86
+ "eval_micro-f1": 0.7867017774851877,
87
+ "eval_runtime": 13.0899,
88
+ "eval_samples_per_second": 76.395,
89
+ "eval_steps_per_second": 2.445,
90
  "step": 1692
91
  },
92
  {
93
  "epoch": 7.0,
94
+ "eval_loss": 0.1959591656923294,
95
+ "eval_macro-f1": 0.7411436475174427,
96
+ "eval_micro-f1": 0.7906208718626155,
97
+ "eval_runtime": 9.3742,
98
+ "eval_samples_per_second": 106.676,
99
+ "eval_steps_per_second": 3.414,
100
  "step": 1974
101
  },
102
  {
103
+ "epoch": 7.0,
104
+ "step": 1974,
105
+ "total_flos": 3.9803472298849075e+17,
106
+ "train_loss": 0.11263628595385025,
107
+ "train_runtime": 1312.5646,
108
+ "train_samples_per_second": 137.136,
109
+ "train_steps_per_second": 4.297
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  }
111
  ],
112
  "logging_steps": 500,
 
135
  "attributes": {}
136
  }
137
  },
138
+ "total_flos": 3.9803472298849075e+17,
139
  "train_batch_size": 16,
140
  "trial_name": null,
141
  "trial_params": null