MHGanainy commited on
Commit
77fcff9
1 Parent(s): 8dd6dea

MHGanainy/gpt2-xl-lora-multi-512-k5-3-im-4

Browse files
README.md CHANGED
@@ -15,6 +15,8 @@ should probably proofread and complete it, then remove this comment. -->
15
  # gpt2-xl-lora-multi-512-k5-3-im-4
16
 
17
  This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
 
 
18
 
19
  ## Model description
20
 
 
15
  # gpt2-xl-lora-multi-512-k5-3-im-4
16
 
17
  This model is a fine-tuned version of [openai-community/gpt2-xl](https://huggingface.co/openai-community/gpt2-xl) on an unknown dataset.
18
+ It achieves the following results on the evaluation set:
19
+ - Loss: 1.7357
20
 
21
  ## Model description
22
 
all_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 1.7357099056243896,
4
+ "eval_runtime": 84.3971,
5
+ "eval_samples_per_second": 137.908,
6
+ "eval_steps_per_second": 8.626,
7
+ "perplexity": 5.6729536363486694,
8
+ "total_flos": 6.449989014257664e+17,
9
+ "train_loss": 2.3536567902828183,
10
+ "train_runtime": 1585.2111,
11
+ "train_samples_per_second": 44.795,
12
+ "train_steps_per_second": 2.8
13
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 1.7357099056243896,
4
+ "eval_runtime": 84.3971,
5
+ "eval_samples_per_second": 137.908,
6
+ "eval_steps_per_second": 8.626,
7
+ "perplexity": 5.6729536363486694
8
+ }
runs/Oct22_13-16-51_6a6876ac5cbf/events.out.tfevents.1729604688.6a6876ac5cbf.2186.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bd11c13efcd41fc5a1e14acd662141bf129a83272d9429b626d9919c5fa72fb
3
+ size 311
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 6.449989014257664e+17,
4
+ "train_loss": 2.3536567902828183,
5
+ "train_runtime": 1585.2111,
6
+ "train_samples_per_second": 44.795,
7
+ "train_steps_per_second": 2.8
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,350 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4439,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.022527596305474205,
13
+ "grad_norm": 0.38304445147514343,
14
+ "learning_rate": 5.464788732394366e-07,
15
+ "loss": 4.2447,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.04505519261094841,
20
+ "grad_norm": 0.4701688885688782,
21
+ "learning_rate": 1.098591549295775e-06,
22
+ "loss": 4.2227,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.06758278891642262,
27
+ "grad_norm": 0.8463152050971985,
28
+ "learning_rate": 1.6619718309859157e-06,
29
+ "loss": 4.173,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.09011038522189682,
34
+ "grad_norm": 0.945652425289154,
35
+ "learning_rate": 2.2253521126760566e-06,
36
+ "loss": 4.0275,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 0.11263798152737103,
41
+ "grad_norm": 0.633182168006897,
42
+ "learning_rate": 2.7887323943661974e-06,
43
+ "loss": 3.827,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.13516557783284525,
48
+ "grad_norm": 0.5879175662994385,
49
+ "learning_rate": 3.352112676056338e-06,
50
+ "loss": 3.693,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.15769317413831943,
55
+ "grad_norm": 0.7559935450553894,
56
+ "learning_rate": 3.915492957746479e-06,
57
+ "loss": 3.5619,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.18022077044379364,
62
+ "grad_norm": 1.0831689834594727,
63
+ "learning_rate": 4.4788732394366205e-06,
64
+ "loss": 3.3173,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 0.20274836674926786,
69
+ "grad_norm": 1.159292459487915,
70
+ "learning_rate": 5.042253521126761e-06,
71
+ "loss": 2.9277,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 0.22527596305474207,
76
+ "grad_norm": 0.8235029578208923,
77
+ "learning_rate": 5.605633802816902e-06,
78
+ "loss": 2.4739,
79
+ "step": 1000
80
+ },
81
+ {
82
+ "epoch": 0.24780355936021625,
83
+ "grad_norm": 0.7550758719444275,
84
+ "learning_rate": 6.169014084507042e-06,
85
+ "loss": 2.3567,
86
+ "step": 1100
87
+ },
88
+ {
89
+ "epoch": 0.2703311556656905,
90
+ "grad_norm": 0.39695167541503906,
91
+ "learning_rate": 6.7323943661971836e-06,
92
+ "loss": 2.3004,
93
+ "step": 1200
94
+ },
95
+ {
96
+ "epoch": 0.2928587519711647,
97
+ "grad_norm": 0.39338722825050354,
98
+ "learning_rate": 7.295774647887325e-06,
99
+ "loss": 2.2446,
100
+ "step": 1300
101
+ },
102
+ {
103
+ "epoch": 0.31538634827663886,
104
+ "grad_norm": 0.485357403755188,
105
+ "learning_rate": 7.859154929577465e-06,
106
+ "loss": 2.2193,
107
+ "step": 1400
108
+ },
109
+ {
110
+ "epoch": 0.3379139445821131,
111
+ "grad_norm": 0.3751625120639801,
112
+ "learning_rate": 8.422535211267607e-06,
113
+ "loss": 2.1718,
114
+ "step": 1500
115
+ },
116
+ {
117
+ "epoch": 0.3604415408875873,
118
+ "grad_norm": 0.4635879099369049,
119
+ "learning_rate": 8.985915492957748e-06,
120
+ "loss": 2.164,
121
+ "step": 1600
122
+ },
123
+ {
124
+ "epoch": 0.3829691371930615,
125
+ "grad_norm": 0.34097710251808167,
126
+ "learning_rate": 9.549295774647888e-06,
127
+ "loss": 2.1225,
128
+ "step": 1700
129
+ },
130
+ {
131
+ "epoch": 0.4054967334985357,
132
+ "grad_norm": 0.35703760385513306,
133
+ "learning_rate": 1.0112676056338028e-05,
134
+ "loss": 2.0951,
135
+ "step": 1800
136
+ },
137
+ {
138
+ "epoch": 0.4280243298040099,
139
+ "grad_norm": 0.39830803871154785,
140
+ "learning_rate": 1.067605633802817e-05,
141
+ "loss": 2.0777,
142
+ "step": 1900
143
+ },
144
+ {
145
+ "epoch": 0.45055192610948414,
146
+ "grad_norm": 0.4066463112831116,
147
+ "learning_rate": 1.1239436619718311e-05,
148
+ "loss": 2.0564,
149
+ "step": 2000
150
+ },
151
+ {
152
+ "epoch": 0.4730795224149583,
153
+ "grad_norm": 0.3597283363342285,
154
+ "learning_rate": 1.1802816901408451e-05,
155
+ "loss": 2.0297,
156
+ "step": 2100
157
+ },
158
+ {
159
+ "epoch": 0.4956071187204325,
160
+ "grad_norm": 0.5371715426445007,
161
+ "learning_rate": 1.2366197183098591e-05,
162
+ "loss": 2.011,
163
+ "step": 2200
164
+ },
165
+ {
166
+ "epoch": 0.5181347150259067,
167
+ "grad_norm": 0.5351554751396179,
168
+ "learning_rate": 1.2929577464788734e-05,
169
+ "loss": 1.9888,
170
+ "step": 2300
171
+ },
172
+ {
173
+ "epoch": 0.540662311331381,
174
+ "grad_norm": 0.42759764194488525,
175
+ "learning_rate": 1.3492957746478874e-05,
176
+ "loss": 1.969,
177
+ "step": 2400
178
+ },
179
+ {
180
+ "epoch": 0.5631899076368552,
181
+ "grad_norm": 0.7073168754577637,
182
+ "learning_rate": 1.4056338028169014e-05,
183
+ "loss": 1.953,
184
+ "step": 2500
185
+ },
186
+ {
187
+ "epoch": 0.5857175039423294,
188
+ "grad_norm": 0.3740929365158081,
189
+ "learning_rate": 1.4619718309859156e-05,
190
+ "loss": 1.9429,
191
+ "step": 2600
192
+ },
193
+ {
194
+ "epoch": 0.6082451002478035,
195
+ "grad_norm": 0.38934916257858276,
196
+ "learning_rate": 1.5183098591549298e-05,
197
+ "loss": 1.9322,
198
+ "step": 2700
199
+ },
200
+ {
201
+ "epoch": 0.6307726965532777,
202
+ "grad_norm": 0.41777440905570984,
203
+ "learning_rate": 1.5746478873239437e-05,
204
+ "loss": 1.9141,
205
+ "step": 2800
206
+ },
207
+ {
208
+ "epoch": 0.653300292858752,
209
+ "grad_norm": 0.4831548035144806,
210
+ "learning_rate": 1.630985915492958e-05,
211
+ "loss": 1.9028,
212
+ "step": 2900
213
+ },
214
+ {
215
+ "epoch": 0.6758278891642262,
216
+ "grad_norm": 0.5036677122116089,
217
+ "learning_rate": 1.687323943661972e-05,
218
+ "loss": 1.8989,
219
+ "step": 3000
220
+ },
221
+ {
222
+ "epoch": 0.6983554854697004,
223
+ "grad_norm": 0.5449413061141968,
224
+ "learning_rate": 1.743661971830986e-05,
225
+ "loss": 1.9056,
226
+ "step": 3100
227
+ },
228
+ {
229
+ "epoch": 0.7208830817751746,
230
+ "grad_norm": 0.49423643946647644,
231
+ "learning_rate": 1.8e-05,
232
+ "loss": 1.8782,
233
+ "step": 3200
234
+ },
235
+ {
236
+ "epoch": 0.7434106780806488,
237
+ "grad_norm": 0.41053274273872375,
238
+ "learning_rate": 1.8563380281690142e-05,
239
+ "loss": 1.8802,
240
+ "step": 3300
241
+ },
242
+ {
243
+ "epoch": 0.765938274386123,
244
+ "grad_norm": 0.5655019283294678,
245
+ "learning_rate": 1.9126760563380284e-05,
246
+ "loss": 1.8609,
247
+ "step": 3400
248
+ },
249
+ {
250
+ "epoch": 0.7884658706915972,
251
+ "grad_norm": 0.40682417154312134,
252
+ "learning_rate": 1.9690140845070425e-05,
253
+ "loss": 1.8518,
254
+ "step": 3500
255
+ },
256
+ {
257
+ "epoch": 0.8109934669970714,
258
+ "grad_norm": 0.4014970064163208,
259
+ "learning_rate": 1.9873824264530204e-05,
260
+ "loss": 1.8537,
261
+ "step": 3600
262
+ },
263
+ {
264
+ "epoch": 0.8335210633025456,
265
+ "grad_norm": 0.7755386829376221,
266
+ "learning_rate": 1.8715663248272807e-05,
267
+ "loss": 1.8414,
268
+ "step": 3700
269
+ },
270
+ {
271
+ "epoch": 0.8560486596080198,
272
+ "grad_norm": 0.42806145548820496,
273
+ "learning_rate": 1.6480361721016053e-05,
274
+ "loss": 1.8389,
275
+ "step": 3800
276
+ },
277
+ {
278
+ "epoch": 0.878576255913494,
279
+ "grad_norm": 0.4193844795227051,
280
+ "learning_rate": 1.3444173337900201e-05,
281
+ "loss": 1.8373,
282
+ "step": 3900
283
+ },
284
+ {
285
+ "epoch": 0.9011038522189683,
286
+ "grad_norm": 0.5671639442443848,
287
+ "learning_rate": 9.982330759173782e-06,
288
+ "loss": 1.8399,
289
+ "step": 4000
290
+ },
291
+ {
292
+ "epoch": 0.9236314485244425,
293
+ "grad_norm": 0.3709975481033325,
294
+ "learning_rate": 6.5226718645285755e-06,
295
+ "loss": 1.8389,
296
+ "step": 4100
297
+ },
298
+ {
299
+ "epoch": 0.9461590448299166,
300
+ "grad_norm": 0.32863345742225647,
301
+ "learning_rate": 3.4927646592337405e-06,
302
+ "loss": 1.8381,
303
+ "step": 4200
304
+ },
305
+ {
306
+ "epoch": 0.9686866411353908,
307
+ "grad_norm": 0.49114689230918884,
308
+ "learning_rate": 1.2670655331731553e-06,
309
+ "loss": 1.8427,
310
+ "step": 4300
311
+ },
312
+ {
313
+ "epoch": 0.991214237440865,
314
+ "grad_norm": 0.41148802638053894,
315
+ "learning_rate": 1.2064141191336588e-07,
316
+ "loss": 1.8362,
317
+ "step": 4400
318
+ },
319
+ {
320
+ "epoch": 1.0,
321
+ "step": 4439,
322
+ "total_flos": 6.449989014257664e+17,
323
+ "train_loss": 2.3536567902828183,
324
+ "train_runtime": 1585.2111,
325
+ "train_samples_per_second": 44.795,
326
+ "train_steps_per_second": 2.8
327
+ }
328
+ ],
329
+ "logging_steps": 100,
330
+ "max_steps": 4439,
331
+ "num_input_tokens_seen": 0,
332
+ "num_train_epochs": 1,
333
+ "save_steps": 500,
334
+ "stateful_callbacks": {
335
+ "TrainerControl": {
336
+ "args": {
337
+ "should_epoch_stop": false,
338
+ "should_evaluate": false,
339
+ "should_log": false,
340
+ "should_save": true,
341
+ "should_training_stop": true
342
+ },
343
+ "attributes": {}
344
+ }
345
+ },
346
+ "total_flos": 6.449989014257664e+17,
347
+ "train_batch_size": 2,
348
+ "trial_name": null,
349
+ "trial_params": null
350
+ }