learn3r commited on
Commit
507a9a9
1 Parent(s): 079c29b

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -1
  2. all_results.json +8 -8
  3. eval_results.json +4 -4
  4. train_results.json +4 -4
  5. trainer_state.json +71 -71
README.md CHANGED
@@ -3,6 +3,8 @@ license: apache-2.0
3
  base_model: facebook/bart-base
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: summ_screen_fd_blueprint_epoch_10
8
  results: []
@@ -13,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # summ_screen_fd_blueprint_epoch_10
15
 
16
- This model is a fine-tuned version of [facebook/bart-base](https://huggingface.co/facebook/bart-base) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - Loss: 1.9589
19
 
 
3
  base_model: facebook/bart-base
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - learn3r/summ_screen_fd_bp
8
  model-index:
9
  - name: summ_screen_fd_blueprint_epoch_10
10
  results: []
 
15
 
16
  # summ_screen_fd_blueprint_epoch_10
17
 
18
+ This model is a fine-tuned version of [facebook/bart-base](https://huggingface.co/facebook/bart-base) on the learn3r/summ_screen_fd_bp dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 1.9589
21
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 9.74,
3
- "eval_loss": 1.9205037355422974,
4
- "eval_runtime": 2.0094,
5
  "eval_samples": 338,
6
- "eval_samples_per_second": 168.21,
7
- "eval_steps_per_second": 21.4,
8
- "train_loss": 2.1082653863089424,
9
- "train_runtime": 679.206,
10
  "train_samples": 3673,
11
- "train_samples_per_second": 54.078,
12
- "train_steps_per_second": 0.206
13
  }
 
1
  {
2
  "epoch": 9.74,
3
+ "eval_loss": 1.9589176177978516,
4
+ "eval_runtime": 2.869,
5
  "eval_samples": 338,
6
+ "eval_samples_per_second": 117.812,
7
+ "eval_steps_per_second": 14.988,
8
+ "train_loss": 2.299447972433908,
9
+ "train_runtime": 1088.9846,
10
  "train_samples": 3673,
11
+ "train_samples_per_second": 33.729,
12
+ "train_steps_per_second": 0.129
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.74,
3
- "eval_loss": 1.9205037355422974,
4
- "eval_runtime": 2.0094,
5
  "eval_samples": 338,
6
- "eval_samples_per_second": 168.21,
7
- "eval_steps_per_second": 21.4
8
  }
 
1
  {
2
  "epoch": 9.74,
3
+ "eval_loss": 1.9589176177978516,
4
+ "eval_runtime": 2.869,
5
  "eval_samples": 338,
6
+ "eval_samples_per_second": 117.812,
7
+ "eval_steps_per_second": 14.988
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.74,
3
- "train_loss": 2.1082653863089424,
4
- "train_runtime": 679.206,
5
  "train_samples": 3673,
6
- "train_samples_per_second": 54.078,
7
- "train_steps_per_second": 0.206
8
  }
 
1
  {
2
  "epoch": 9.74,
3
+ "train_loss": 2.299447972433908,
4
+ "train_runtime": 1088.9846,
5
  "train_samples": 3673,
6
+ "train_samples_per_second": 33.729,
7
+ "train_steps_per_second": 0.129
8
  }
trainer_state.json CHANGED
@@ -10,175 +10,175 @@
10
  {
11
  "epoch": 0.7,
12
  "learning_rate": 9.285714285714286e-05,
13
- "loss": 3.3723,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.97,
18
- "eval_loss": 2.261305332183838,
19
- "eval_runtime": 1.7879,
20
- "eval_samples_per_second": 189.047,
21
- "eval_steps_per_second": 24.05,
22
  "step": 14
23
  },
24
  {
25
  "epoch": 1.39,
26
- "learning_rate": 8.642857142857143e-05,
27
- "loss": 2.4888,
28
  "step": 20
29
  },
30
  {
31
  "epoch": 1.95,
32
- "eval_loss": 2.087858200073242,
33
- "eval_runtime": 1.8469,
34
- "eval_samples_per_second": 183.011,
35
- "eval_steps_per_second": 23.283,
36
  "step": 28
37
  },
38
  {
39
  "epoch": 2.09,
40
- "learning_rate": 7.928571428571429e-05,
41
- "loss": 2.3065,
42
  "step": 30
43
  },
44
  {
45
  "epoch": 2.78,
46
- "learning_rate": 7.214285714285714e-05,
47
- "loss": 2.1694,
48
  "step": 40
49
  },
50
  {
51
  "epoch": 2.99,
52
- "eval_loss": 2.0183253288269043,
53
- "eval_runtime": 1.7709,
54
- "eval_samples_per_second": 190.86,
55
- "eval_steps_per_second": 24.281,
56
  "step": 43
57
  },
58
  {
59
  "epoch": 3.48,
60
- "learning_rate": 6.500000000000001e-05,
61
- "loss": 2.0846,
62
  "step": 50
63
  },
64
  {
65
  "epoch": 3.97,
66
- "eval_loss": 1.983121395111084,
67
- "eval_runtime": 1.724,
68
- "eval_samples_per_second": 196.056,
69
- "eval_steps_per_second": 24.942,
70
  "step": 57
71
  },
72
  {
73
  "epoch": 4.17,
74
- "learning_rate": 5.785714285714287e-05,
75
- "loss": 2.026,
76
  "step": 60
77
  },
78
  {
79
  "epoch": 4.87,
80
- "learning_rate": 5.0714285714285716e-05,
81
- "loss": 1.9812,
82
  "step": 70
83
  },
84
  {
85
  "epoch": 4.94,
86
- "eval_loss": 1.9576869010925293,
87
- "eval_runtime": 2.058,
88
- "eval_samples_per_second": 164.239,
89
- "eval_steps_per_second": 20.894,
90
  "step": 71
91
  },
92
  {
93
  "epoch": 5.57,
94
- "learning_rate": 4.3571428571428576e-05,
95
- "loss": 1.9352,
96
  "step": 80
97
  },
98
  {
99
  "epoch": 5.98,
100
- "eval_loss": 1.9370218515396118,
101
- "eval_runtime": 1.7855,
102
- "eval_samples_per_second": 189.305,
103
- "eval_steps_per_second": 24.083,
104
  "step": 86
105
  },
106
  {
107
  "epoch": 6.26,
108
- "learning_rate": 3.642857142857143e-05,
109
- "loss": 1.9164,
110
  "step": 90
111
  },
112
  {
113
  "epoch": 6.96,
114
- "learning_rate": 2.9285714285714288e-05,
115
- "loss": 1.8822,
116
  "step": 100
117
  },
118
  {
119
  "epoch": 6.96,
120
- "eval_loss": 1.9341849088668823,
121
- "eval_runtime": 1.8263,
122
- "eval_samples_per_second": 185.074,
123
- "eval_steps_per_second": 23.545,
124
  "step": 100
125
  },
126
  {
127
  "epoch": 7.65,
128
- "learning_rate": 2.214285714285714e-05,
129
- "loss": 1.853,
130
  "step": 110
131
  },
132
  {
133
  "epoch": 8.0,
134
- "eval_loss": 1.9238194227218628,
135
- "eval_runtime": 1.8358,
136
- "eval_samples_per_second": 184.114,
137
- "eval_steps_per_second": 23.423,
138
  "step": 115
139
  },
140
  {
141
  "epoch": 8.35,
142
- "learning_rate": 1.5e-05,
143
- "loss": 1.8446,
144
  "step": 120
145
  },
146
  {
147
  "epoch": 8.97,
148
- "eval_loss": 1.9229856729507446,
149
- "eval_runtime": 1.8246,
150
- "eval_samples_per_second": 185.25,
151
- "eval_steps_per_second": 23.567,
152
  "step": 129
153
  },
154
  {
155
  "epoch": 9.04,
156
- "learning_rate": 7.857142857142858e-06,
157
- "loss": 1.8343,
158
  "step": 130
159
  },
160
  {
161
  "epoch": 9.74,
162
- "learning_rate": 7.142857142857143e-07,
163
- "loss": 1.8211,
164
  "step": 140
165
  },
166
  {
167
  "epoch": 9.74,
168
- "eval_loss": 1.9205037355422974,
169
- "eval_runtime": 1.9363,
170
- "eval_samples_per_second": 174.556,
171
- "eval_steps_per_second": 22.207,
172
  "step": 140
173
  },
174
  {
175
  "epoch": 9.74,
176
  "step": 140,
177
  "total_flos": 2.181454126645248e+16,
178
- "train_loss": 2.1082653863089424,
179
- "train_runtime": 679.206,
180
- "train_samples_per_second": 54.078,
181
- "train_steps_per_second": 0.206
182
  }
183
  ],
184
  "max_steps": 140,
 
10
  {
11
  "epoch": 0.7,
12
  "learning_rate": 9.285714285714286e-05,
13
+ "loss": 3.519,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.97,
18
+ "eval_loss": 2.3055925369262695,
19
+ "eval_runtime": 3.071,
20
+ "eval_samples_per_second": 110.064,
21
+ "eval_steps_per_second": 14.002,
22
  "step": 14
23
  },
24
  {
25
  "epoch": 1.39,
26
+ "learning_rate": 8.571428571428571e-05,
27
+ "loss": 2.6644,
28
  "step": 20
29
  },
30
  {
31
  "epoch": 1.95,
32
+ "eval_loss": 2.1348636150360107,
33
+ "eval_runtime": 2.9701,
34
+ "eval_samples_per_second": 113.801,
35
+ "eval_steps_per_second": 14.478,
36
  "step": 28
37
  },
38
  {
39
  "epoch": 2.09,
40
+ "learning_rate": 7.857142857142858e-05,
41
+ "loss": 2.465,
42
  "step": 30
43
  },
44
  {
45
  "epoch": 2.78,
46
+ "learning_rate": 7.142857142857143e-05,
47
+ "loss": 2.3418,
48
  "step": 40
49
  },
50
  {
51
  "epoch": 2.99,
52
+ "eval_loss": 2.0616259574890137,
53
+ "eval_runtime": 2.731,
54
+ "eval_samples_per_second": 123.764,
55
+ "eval_steps_per_second": 15.745,
56
  "step": 43
57
  },
58
  {
59
  "epoch": 3.48,
60
+ "learning_rate": 6.428571428571429e-05,
61
+ "loss": 2.258,
62
  "step": 50
63
  },
64
  {
65
  "epoch": 3.97,
66
+ "eval_loss": 2.035996913909912,
67
+ "eval_runtime": 2.9612,
68
+ "eval_samples_per_second": 114.144,
69
+ "eval_steps_per_second": 14.521,
70
  "step": 57
71
  },
72
  {
73
  "epoch": 4.17,
74
+ "learning_rate": 5.714285714285714e-05,
75
+ "loss": 2.2166,
76
  "step": 60
77
  },
78
  {
79
  "epoch": 4.87,
80
+ "learning_rate": 5e-05,
81
+ "loss": 2.169,
82
  "step": 70
83
  },
84
  {
85
  "epoch": 4.94,
86
+ "eval_loss": 1.9996529817581177,
87
+ "eval_runtime": 2.9471,
88
+ "eval_samples_per_second": 114.69,
89
+ "eval_steps_per_second": 14.591,
90
  "step": 71
91
  },
92
  {
93
  "epoch": 5.57,
94
+ "learning_rate": 4.2857142857142856e-05,
95
+ "loss": 2.1336,
96
  "step": 80
97
  },
98
  {
99
  "epoch": 5.98,
100
+ "eval_loss": 1.986232042312622,
101
+ "eval_runtime": 2.9637,
102
+ "eval_samples_per_second": 114.046,
103
+ "eval_steps_per_second": 14.509,
104
  "step": 86
105
  },
106
  {
107
  "epoch": 6.26,
108
+ "learning_rate": 3.571428571428572e-05,
109
+ "loss": 2.1131,
110
  "step": 90
111
  },
112
  {
113
  "epoch": 6.96,
114
+ "learning_rate": 2.857142857142857e-05,
115
+ "loss": 2.0952,
116
  "step": 100
117
  },
118
  {
119
  "epoch": 6.96,
120
+ "eval_loss": 1.9753971099853516,
121
+ "eval_runtime": 2.8395,
122
+ "eval_samples_per_second": 119.034,
123
+ "eval_steps_per_second": 15.143,
124
  "step": 100
125
  },
126
  {
127
  "epoch": 7.65,
128
+ "learning_rate": 2.1428571428571428e-05,
129
+ "loss": 2.0666,
130
  "step": 110
131
  },
132
  {
133
  "epoch": 8.0,
134
+ "eval_loss": 1.9659732580184937,
135
+ "eval_runtime": 2.7307,
136
+ "eval_samples_per_second": 123.779,
137
+ "eval_steps_per_second": 15.747,
138
  "step": 115
139
  },
140
  {
141
  "epoch": 8.35,
142
+ "learning_rate": 1.4285714285714285e-05,
143
+ "loss": 2.0563,
144
  "step": 120
145
  },
146
  {
147
  "epoch": 8.97,
148
+ "eval_loss": 1.9610240459442139,
149
+ "eval_runtime": 2.8488,
150
+ "eval_samples_per_second": 118.648,
151
+ "eval_steps_per_second": 15.094,
152
  "step": 129
153
  },
154
  {
155
  "epoch": 9.04,
156
+ "learning_rate": 7.142857142857143e-06,
157
+ "loss": 2.051,
158
  "step": 130
159
  },
160
  {
161
  "epoch": 9.74,
162
+ "learning_rate": 0.0,
163
+ "loss": 2.0426,
164
  "step": 140
165
  },
166
  {
167
  "epoch": 9.74,
168
+ "eval_loss": 1.9589176177978516,
169
+ "eval_runtime": 2.9503,
170
+ "eval_samples_per_second": 114.564,
171
+ "eval_steps_per_second": 14.575,
172
  "step": 140
173
  },
174
  {
175
  "epoch": 9.74,
176
  "step": 140,
177
  "total_flos": 2.181454126645248e+16,
178
+ "train_loss": 2.299447972433908,
179
+ "train_runtime": 1088.9846,
180
+ "train_samples_per_second": 33.729,
181
+ "train_steps_per_second": 0.129
182
  }
183
  ],
184
  "max_steps": 140,