Dmitry Chaplinsky
commited on
Commit
•
1f399b4
1
Parent(s):
12f0c46
Another try
Browse files- .gitattributes +2 -0
- README.md +33 -0
- best-lm.pt +3 -0
- flair_dictionary.pkl +3 -0
- loss.txt +308 -0
- pipeline.py +23 -0
- requirements.txt +1 -0
.gitattributes
CHANGED
@@ -30,3 +30,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
30 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
31 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
32 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
30 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
31 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
32 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
33 |
+
best-lm.pt filter=lfs diff=lfs merge=lfs -text
|
34 |
+
flair_dictionary.pkl filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -1,3 +1,36 @@
|
|
1 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
license: mit
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
language:
|
3 |
+
- uk
|
4 |
+
tags:
|
5 |
+
- text2text-generation
|
6 |
+
- flair
|
7 |
+
library_name: generic
|
8 |
license: mit
|
9 |
+
metrics:
|
10 |
+
- perplexity
|
11 |
+
datasets:
|
12 |
+
- ubertext2.0
|
13 |
+
widget:
|
14 |
+
- text: "Росія зазнає поразки"
|
15 |
+
- text: "Достеменно відомо, що Україна перемагає"
|
16 |
---
|
17 |
+
|
18 |
+
# Ukrainian flair embeddings (forward)
|
19 |
+
|
20 |
+
Trained for 10 epochs on the texts from ubertext2.0 (WIP).
|
21 |
+
The characters dictionary used for training is in `flair_dictionary.pkl` file
|
22 |
+
|
23 |
+
For more information on flair embeddings see [the article](https://github.com/flairNLP/flair/blob/master/resources/docs/embeddings/FLAIR_EMBEDDINGS.md) or the paper below:
|
24 |
+
|
25 |
+
|
26 |
+
```bibtex
|
27 |
+
@inproceedings{akbik2018coling,
|
28 |
+
title={Contextual String Embeddings for Sequence Labeling},
|
29 |
+
author={Akbik, Alan and Blythe, Duncan and Vollgraf, Roland},
|
30 |
+
booktitle = {{COLING} 2018, 27th International Conference on Computational Linguistics},
|
31 |
+
pages = {1638--1649},
|
32 |
+
year = {2018}
|
33 |
+
}
|
34 |
+
```
|
35 |
+
|
36 |
+
Copyright: Dmytro Chaplynskyi, [lang-uk](https://lang.org.ua) project, 2022
|
best-lm.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d952419535bc66f60333174cad83dac8e95b83da1546a1ea5303823d758c1802
|
3 |
+
size 22791455
|
flair_dictionary.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2125c32d2db5fb79676a8a6f087b19e9c3b788cb19b87073423e31e176d1fe24
|
3 |
+
size 11900
|
loss.txt
ADDED
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
| end of split 1 / 28 | epoch 1 | time: 3240.36s | valid loss 1.6147 | valid ppl 5.0265 | learning rate 20.0000
|
2 |
+
| end of split 2 / 28 | epoch 1 | time: 3308.92s | valid loss 1.3939 | valid ppl 4.0306 | learning rate 20.0000
|
3 |
+
| end of split 3 / 28 | epoch 1 | time: 3314.96s | valid loss 1.3076 | valid ppl 3.6972 | learning rate 20.0000
|
4 |
+
| end of split 4 / 28 | epoch 1 | time: 3310.54s | valid loss 1.2635 | valid ppl 3.5377 | learning rate 20.0000
|
5 |
+
| end of split 5 / 28 | epoch 1 | time: 3314.64s | valid loss 1.2355 | valid ppl 3.4401 | learning rate 20.0000
|
6 |
+
| end of split 6 / 28 | epoch 1 | time: 3315.86s | valid loss 1.2150 | valid ppl 3.3702 | learning rate 20.0000
|
7 |
+
| end of split 7 / 28 | epoch 1 | time: 3310.63s | valid loss 1.1991 | valid ppl 3.3170 | learning rate 20.0000
|
8 |
+
| end of split 8 / 28 | epoch 1 | time: 3308.35s | valid loss 1.1851 | valid ppl 3.2712 | learning rate 20.0000
|
9 |
+
| end of split 9 / 28 | epoch 1 | time: 3300.72s | valid loss 1.1778 | valid ppl 3.2472 | learning rate 20.0000
|
10 |
+
| end of split 10 / 28 | epoch 1 | time: 3285.69s | valid loss 1.1703 | valid ppl 3.2231 | learning rate 20.0000
|
11 |
+
| end of split 11 / 28 | epoch 1 | time: 3296.28s | valid loss 1.1585 | valid ppl 3.1851 | learning rate 20.0000
|
12 |
+
| end of split 12 / 28 | epoch 1 | time: 3295.62s | valid loss 1.1557 | valid ppl 3.1762 | learning rate 20.0000
|
13 |
+
| end of split 13 / 28 | epoch 1 | time: 3299.01s | valid loss 1.1500 | valid ppl 3.1581 | learning rate 20.0000
|
14 |
+
| end of split 14 / 28 | epoch 1 | time: 3286.78s | valid loss 1.1402 | valid ppl 3.1274 | learning rate 20.0000
|
15 |
+
| end of split 15 / 28 | epoch 1 | time: 3297.94s | valid loss 1.1399 | valid ppl 3.1264 | learning rate 20.0000
|
16 |
+
| end of split 16 / 28 | epoch 1 | time: 3232.64s | valid loss 1.1346 | valid ppl 3.1099 | learning rate 20.0000
|
17 |
+
| end of split 17 / 28 | epoch 1 | time: 3083.81s | valid loss 1.1279 | valid ppl 3.0892 | learning rate 20.0000
|
18 |
+
| end of split 18 / 28 | epoch 1 | time: 3084.41s | valid loss 1.1277 | valid ppl 3.0885 | learning rate 20.0000
|
19 |
+
| end of split 19 / 28 | epoch 1 | time: 3083.52s | valid loss 1.1237 | valid ppl 3.0762 | learning rate 20.0000
|
20 |
+
| end of split 20 / 28 | epoch 1 | time: 3083.77s | valid loss 1.1200 | valid ppl 3.0649 | learning rate 20.0000
|
21 |
+
| end of split 21 / 28 | epoch 1 | time: 3080.82s | valid loss 1.1170 | valid ppl 3.0556 | learning rate 20.0000
|
22 |
+
| end of split 22 / 28 | epoch 1 | time: 3081.82s | valid loss 1.1157 | valid ppl 3.0516 | learning rate 20.0000
|
23 |
+
| end of split 23 / 28 | epoch 1 | time: 3083.61s | valid loss 1.1135 | valid ppl 3.0450 | learning rate 20.0000
|
24 |
+
| end of split 24 / 28 | epoch 1 | time: 3083.95s | valid loss 1.1100 | valid ppl 3.0343 | learning rate 20.0000
|
25 |
+
| end of split 25 / 28 | epoch 1 | time: 3079.21s | valid loss 1.1072 | valid ppl 3.0260 | learning rate 20.0000
|
26 |
+
| end of split 26 / 28 | epoch 1 | time: 3083.88s | valid loss 1.1086 | valid ppl 3.0303 | learning rate 20.0000
|
27 |
+
| end of split 27 / 28 | epoch 1 | time: 3203.88s | valid loss 1.1031 | valid ppl 3.0134 | learning rate 20.0000
|
28 |
+
| end of split 28 / 28 | epoch 1 | time: 965.58s | valid loss 1.1026 | valid ppl 3.0121 | learning rate 20.0000
|
29 |
+
| end of split 1 / 28 | epoch 2 | time: 3314.12s | valid loss 1.1022 | valid ppl 3.0108 | learning rate 20.0000
|
30 |
+
| end of split 2 / 28 | epoch 2 | time: 3475.79s | valid loss 1.0990 | valid ppl 3.0012 | learning rate 20.0000
|
31 |
+
| end of split 3 / 28 | epoch 2 | time: 3500.92s | valid loss 1.0974 | valid ppl 2.9965 | learning rate 20.0000
|
32 |
+
| end of split 4 / 28 | epoch 2 | time: 3501.92s | valid loss 1.0997 | valid ppl 3.0032 | learning rate 20.0000
|
33 |
+
| end of split 5 / 28 | epoch 2 | time: 3507.32s | valid loss 1.0945 | valid ppl 2.9878 | learning rate 20.0000
|
34 |
+
| end of split 6 / 28 | epoch 2 | time: 3502.18s | valid loss 1.0936 | valid ppl 2.9851 | learning rate 20.0000
|
35 |
+
| end of split 7 / 28 | epoch 2 | time: 999.87s | valid loss 1.0941 | valid ppl 2.9866 | learning rate 20.0000
|
36 |
+
| end of split 8 / 28 | epoch 2 | time: 3343.84s | valid loss 1.0923 | valid ppl 2.9810 | learning rate 20.0000
|
37 |
+
| end of split 9 / 28 | epoch 2 | time: 3340.49s | valid loss 1.0905 | valid ppl 2.9758 | learning rate 20.0000
|
38 |
+
| end of split 10 / 28 | epoch 2 | time: 3338.10s | valid loss 1.0919 | valid ppl 2.9798 | learning rate 20.0000
|
39 |
+
| end of split 11 / 28 | epoch 2 | time: 3331.60s | valid loss 1.0896 | valid ppl 2.9730 | learning rate 20.0000
|
40 |
+
| end of split 12 / 28 | epoch 2 | time: 3337.05s | valid loss 1.0863 | valid ppl 2.9632 | learning rate 20.0000
|
41 |
+
| end of split 13 / 28 | epoch 2 | time: 3336.59s | valid loss 1.0850 | valid ppl 2.9594 | learning rate 20.0000
|
42 |
+
| end of split 14 / 28 | epoch 2 | time: 3333.13s | valid loss 1.0850 | valid ppl 2.9593 | learning rate 20.0000
|
43 |
+
| end of split 15 / 28 | epoch 2 | time: 3331.93s | valid loss 1.0846 | valid ppl 2.9582 | learning rate 20.0000
|
44 |
+
| end of split 16 / 28 | epoch 2 | time: 3301.36s | valid loss 1.0835 | valid ppl 2.9549 | learning rate 20.0000
|
45 |
+
| end of split 17 / 28 | epoch 2 | time: 3308.70s | valid loss 1.0819 | valid ppl 2.9503 | learning rate 20.0000
|
46 |
+
| end of split 18 / 28 | epoch 2 | time: 3316.27s | valid loss 1.0817 | valid ppl 2.9497 | learning rate 20.0000
|
47 |
+
| end of split 19 / 28 | epoch 2 | time: 3310.75s | valid loss 1.0806 | valid ppl 2.9465 | learning rate 20.0000
|
48 |
+
| end of split 20 / 28 | epoch 2 | time: 3311.32s | valid loss 1.0781 | valid ppl 2.9391 | learning rate 20.0000
|
49 |
+
| end of split 21 / 28 | epoch 2 | time: 3309.05s | valid loss 1.0776 | valid ppl 2.9375 | learning rate 20.0000
|
50 |
+
| end of split 22 / 28 | epoch 2 | time: 3310.70s | valid loss 1.0780 | valid ppl 2.9389 | learning rate 20.0000
|
51 |
+
| end of split 23 / 28 | epoch 2 | time: 3311.48s | valid loss 1.0797 | valid ppl 2.9439 | learning rate 20.0000
|
52 |
+
| end of split 24 / 28 | epoch 2 | time: 3309.16s | valid loss 1.0760 | valid ppl 2.9330 | learning rate 20.0000
|
53 |
+
| end of split 25 / 28 | epoch 2 | time: 3300.41s | valid loss 1.0757 | valid ppl 2.9319 | learning rate 20.0000
|
54 |
+
| end of split 26 / 28 | epoch 2 | time: 3305.46s | valid loss 1.0736 | valid ppl 2.9260 | learning rate 20.0000
|
55 |
+
| end of split 27 / 28 | epoch 2 | time: 3307.40s | valid loss 1.0725 | valid ppl 2.9227 | learning rate 20.0000
|
56 |
+
| end of split 28 / 28 | epoch 2 | time: 3308.75s | valid loss 1.0735 | valid ppl 2.9256 | learning rate 20.0000
|
57 |
+
| end of split 1 / 28 | epoch 3 | time: 3335.00s | valid loss 1.0734 | valid ppl 2.9253 | learning rate 20.0000
|
58 |
+
| end of split 2 / 28 | epoch 3 | time: 3357.23s | valid loss 1.0715 | valid ppl 2.9198 | learning rate 20.0000
|
59 |
+
| end of split 3 / 28 | epoch 3 | time: 3354.52s | valid loss 1.0707 | valid ppl 2.9174 | learning rate 20.0000
|
60 |
+
| end of split 4 / 28 | epoch 3 | time: 3352.96s | valid loss 1.0696 | valid ppl 2.9143 | learning rate 20.0000
|
61 |
+
| end of split 5 / 28 | epoch 3 | time: 3350.73s | valid loss 1.0690 | valid ppl 2.9126 | learning rate 20.0000
|
62 |
+
| end of split 6 / 28 | epoch 3 | time: 3351.52s | valid loss 1.0686 | valid ppl 2.9113 | learning rate 20.0000
|
63 |
+
| end of split 7 / 28 | epoch 3 | time: 3334.50s | valid loss 1.0666 | valid ppl 2.9056 | learning rate 20.0000
|
64 |
+
| end of split 8 / 28 | epoch 3 | time: 3335.75s | valid loss 1.0687 | valid ppl 2.9115 | learning rate 20.0000
|
65 |
+
| end of split 9 / 28 | epoch 3 | time: 979.52s | valid loss 1.0667 | valid ppl 2.9058 | learning rate 20.0000
|
66 |
+
| end of split 10 / 28 | epoch 3 | time: 3340.27s | valid loss 1.0666 | valid ppl 2.9054 | learning rate 20.0000
|
67 |
+
| end of split 11 / 28 | epoch 3 | time: 3343.01s | valid loss 1.0676 | valid ppl 2.9084 | learning rate 20.0000
|
68 |
+
| end of split 12 / 28 | epoch 3 | time: 3344.63s | valid loss 1.0656 | valid ppl 2.9024 | learning rate 20.0000
|
69 |
+
| end of split 13 / 28 | epoch 3 | time: 3330.31s | valid loss 1.0663 | valid ppl 2.9047 | learning rate 20.0000
|
70 |
+
| end of split 14 / 28 | epoch 3 | time: 3340.17s | valid loss 1.0662 | valid ppl 2.9043 | learning rate 20.0000
|
71 |
+
| end of split 15 / 28 | epoch 3 | time: 3331.70s | valid loss 1.0651 | valid ppl 2.9010 | learning rate 20.0000
|
72 |
+
| end of split 16 / 28 | epoch 3 | time: 3345.00s | valid loss 1.0646 | valid ppl 2.8996 | learning rate 20.0000
|
73 |
+
| end of split 17 / 28 | epoch 3 | time: 3344.04s | valid loss 1.0627 | valid ppl 2.8943 | learning rate 20.0000
|
74 |
+
| end of split 18 / 28 | epoch 3 | time: 3342.21s | valid loss 1.0623 | valid ppl 2.8931 | learning rate 20.0000
|
75 |
+
| end of split 19 / 28 | epoch 3 | time: 3340.44s | valid loss 1.0627 | valid ppl 2.8941 | learning rate 20.0000
|
76 |
+
| end of split 20 / 28 | epoch 3 | time: 3308.47s | valid loss 1.0604 | valid ppl 2.8875 | learning rate 20.0000
|
77 |
+
| end of split 21 / 28 | epoch 3 | time: 3315.07s | valid loss 1.0617 | valid ppl 2.8912 | learning rate 20.0000
|
78 |
+
| end of split 22 / 28 | epoch 3 | time: 3323.04s | valid loss 1.0607 | valid ppl 2.8884 | learning rate 20.0000
|
79 |
+
| end of split 23 / 28 | epoch 3 | time: 3322.40s | valid loss 1.0600 | valid ppl 2.8863 | learning rate 20.0000
|
80 |
+
| end of split 24 / 28 | epoch 3 | time: 3328.09s | valid loss 1.0621 | valid ppl 2.8925 | learning rate 20.0000
|
81 |
+
| end of split 25 / 28 | epoch 3 | time: 3337.84s | valid loss 1.0617 | valid ppl 2.8912 | learning rate 20.0000
|
82 |
+
| end of split 26 / 28 | epoch 3 | time: 3328.62s | valid loss 1.0595 | valid ppl 2.8849 | learning rate 20.0000
|
83 |
+
| end of split 27 / 28 | epoch 3 | time: 3329.98s | valid loss 1.0603 | valid ppl 2.8871 | learning rate 20.0000
|
84 |
+
| end of split 28 / 28 | epoch 3 | time: 3326.62s | valid loss 1.0592 | valid ppl 2.8841 | learning rate 20.0000
|
85 |
+
| end of split 1 / 28 | epoch 4 | time: 3362.65s | valid loss 1.0588 | valid ppl 2.8829 | learning rate 20.0000
|
86 |
+
| end of split 2 / 28 | epoch 4 | time: 3372.84s | valid loss 1.0574 | valid ppl 2.8788 | learning rate 20.0000
|
87 |
+
| end of split 3 / 28 | epoch 4 | time: 3369.82s | valid loss 1.0593 | valid ppl 2.8843 | learning rate 20.0000
|
88 |
+
| end of split 4 / 28 | epoch 4 | time: 3369.24s | valid loss 1.0561 | valid ppl 2.8750 | learning rate 20.0000
|
89 |
+
| end of split 5 / 28 | epoch 4 | time: 3362.94s | valid loss 1.0567 | valid ppl 2.8768 | learning rate 20.0000
|
90 |
+
| end of split 6 / 28 | epoch 4 | time: 3364.27s | valid loss 1.0591 | valid ppl 2.8837 | learning rate 20.0000
|
91 |
+
| end of split 7 / 28 | epoch 4 | time: 3356.17s | valid loss 1.0548 | valid ppl 2.8714 | learning rate 20.0000
|
92 |
+
| end of split 8 / 28 | epoch 4 | time: 3345.16s | valid loss 1.0556 | valid ppl 2.8737 | learning rate 20.0000
|
93 |
+
| end of split 9 / 28 | epoch 4 | time: 3341.86s | valid loss 1.0568 | valid ppl 2.8771 | learning rate 20.0000
|
94 |
+
| end of split 10 / 28 | epoch 4 | time: 980.93s | valid loss 1.0546 | valid ppl 2.8708 | learning rate 20.0000
|
95 |
+
| end of split 11 / 28 | epoch 4 | time: 3346.04s | valid loss 1.0547 | valid ppl 2.8712 | learning rate 20.0000
|
96 |
+
| end of split 12 / 28 | epoch 4 | time: 3335.92s | valid loss 1.0545 | valid ppl 2.8705 | learning rate 20.0000
|
97 |
+
| end of split 13 / 28 | epoch 4 | time: 3336.81s | valid loss 1.0535 | valid ppl 2.8676 | learning rate 20.0000
|
98 |
+
| end of split 14 / 28 | epoch 4 | time: 3336.67s | valid loss 1.0539 | valid ppl 2.8689 | learning rate 20.0000
|
99 |
+
| end of split 15 / 28 | epoch 4 | time: 3337.57s | valid loss 1.0542 | valid ppl 2.8697 | learning rate 20.0000
|
100 |
+
| end of split 16 / 28 | epoch 4 | time: 3335.23s | valid loss 1.0544 | valid ppl 2.8702 | learning rate 20.0000
|
101 |
+
| end of split 17 / 28 | epoch 4 | time: 3337.46s | valid loss 1.0548 | valid ppl 2.8714 | learning rate 20.0000
|
102 |
+
| end of split 18 / 28 | epoch 4 | time: 3336.78s | valid loss 1.0522 | valid ppl 2.8641 | learning rate 20.0000
|
103 |
+
| end of split 19 / 28 | epoch 4 | time: 3335.97s | valid loss 1.0516 | valid ppl 2.8623 | learning rate 20.0000
|
104 |
+
| end of split 20 / 28 | epoch 4 | time: 3342.62s | valid loss 1.0522 | valid ppl 2.8639 | learning rate 20.0000
|
105 |
+
| end of split 21 / 28 | epoch 4 | time: 3346.48s | valid loss 1.0513 | valid ppl 2.8614 | learning rate 20.0000
|
106 |
+
| end of split 22 / 28 | epoch 4 | time: 3355.85s | valid loss 1.0510 | valid ppl 2.8605 | learning rate 20.0000
|
107 |
+
| end of split 23 / 28 | epoch 4 | time: 3359.76s | valid loss 1.0521 | valid ppl 2.8636 | learning rate 20.0000
|
108 |
+
| end of split 24 / 28 | epoch 4 | time: 3329.20s | valid loss 1.0524 | valid ppl 2.8644 | learning rate 20.0000
|
109 |
+
| end of split 25 / 28 | epoch 4 | time: 3355.82s | valid loss 1.0504 | valid ppl 2.8588 | learning rate 20.0000
|
110 |
+
| end of split 26 / 28 | epoch 4 | time: 3367.07s | valid loss 1.0508 | valid ppl 2.8600 | learning rate 20.0000
|
111 |
+
| end of split 27 / 28 | epoch 4 | time: 3366.55s | valid loss 1.0500 | valid ppl 2.8577 | learning rate 20.0000
|
112 |
+
| end of split 28 / 28 | epoch 4 | time: 3369.33s | valid loss 1.0501 | valid ppl 2.8580 | learning rate 20.0000
|
113 |
+
| end of split 1 / 28 | epoch 5 | time: 3342.95s | valid loss 1.0492 | valid ppl 2.8555 | learning rate 20.0000
|
114 |
+
| end of split 2 / 28 | epoch 5 | time: 3366.55s | valid loss 1.0498 | valid ppl 2.8571 | learning rate 20.0000
|
115 |
+
| end of split 3 / 28 | epoch 5 | time: 3356.80s | valid loss 1.0495 | valid ppl 2.8562 | learning rate 20.0000
|
116 |
+
| end of split 4 / 28 | epoch 5 | time: 3350.85s | valid loss 1.0484 | valid ppl 2.8531 | learning rate 20.0000
|
117 |
+
| end of split 5 / 28 | epoch 5 | time: 3351.73s | valid loss 1.0488 | valid ppl 2.8543 | learning rate 20.0000
|
118 |
+
| end of split 6 / 28 | epoch 5 | time: 3351.26s | valid loss 1.0479 | valid ppl 2.8516 | learning rate 20.0000
|
119 |
+
| end of split 7 / 28 | epoch 5 | time: 3351.24s | valid loss 1.0478 | valid ppl 2.8513 | learning rate 20.0000
|
120 |
+
| end of split 8 / 28 | epoch 5 | time: 3349.83s | valid loss 1.0484 | valid ppl 2.8531 | learning rate 20.0000
|
121 |
+
| end of split 9 / 28 | epoch 5 | time: 3348.30s | valid loss 1.0484 | valid ppl 2.8530 | learning rate 20.0000
|
122 |
+
| end of split 10 / 28 | epoch 5 | time: 3333.65s | valid loss 1.0473 | valid ppl 2.8500 | learning rate 20.0000
|
123 |
+
| end of split 11 / 28 | epoch 5 | time: 3345.83s | valid loss 1.0469 | valid ppl 2.8487 | learning rate 20.0000
|
124 |
+
| end of split 12 / 28 | epoch 5 | time: 3344.22s | valid loss 1.0480 | valid ppl 2.8518 | learning rate 20.0000
|
125 |
+
| end of split 13 / 28 | epoch 5 | time: 11361.46s | valid loss 1.0469 | valid ppl 2.8487 | learning rate 20.0000
|
126 |
+
| end of split 14 / 28 | epoch 5 | time: 3345.80s | valid loss 1.0478 | valid ppl 2.8514 | learning rate 20.0000
|
127 |
+
| end of split 15 / 28 | epoch 5 | time: 3347.61s | valid loss 1.0450 | valid ppl 2.8433 | learning rate 20.0000
|
128 |
+
| end of split 16 / 28 | epoch 5 | time: 3338.68s | valid loss 1.0458 | valid ppl 2.8456 | learning rate 20.0000
|
129 |
+
| end of split 17 / 28 | epoch 5 | time: 3356.79s | valid loss 1.0462 | valid ppl 2.8468 | learning rate 20.0000
|
130 |
+
| end of split 18 / 28 | epoch 5 | time: 3354.23s | valid loss 1.0468 | valid ppl 2.8486 | learning rate 20.0000
|
131 |
+
| end of split 19 / 28 | epoch 5 | time: 3361.30s | valid loss 1.0468 | valid ppl 2.8485 | learning rate 20.0000
|
132 |
+
| end of split 20 / 28 | epoch 5 | time: 3362.74s | valid loss 1.0451 | valid ppl 2.8436 | learning rate 20.0000
|
133 |
+
| end of split 21 / 28 | epoch 5 | time: 3369.02s | valid loss 1.0454 | valid ppl 2.8446 | learning rate 20.0000
|
134 |
+
| end of split 22 / 28 | epoch 5 | time: 988.45s | valid loss 1.0442 | valid ppl 2.8412 | learning rate 20.0000
|
135 |
+
| end of split 23 / 28 | epoch 5 | time: 3371.99s | valid loss 1.0436 | valid ppl 2.8394 | learning rate 20.0000
|
136 |
+
| end of split 24 / 28 | epoch 5 | time: 3372.04s | valid loss 1.0443 | valid ppl 2.8413 | learning rate 20.0000
|
137 |
+
| end of split 25 / 28 | epoch 5 | time: 3342.55s | valid loss 1.0439 | valid ppl 2.8402 | learning rate 20.0000
|
138 |
+
| end of split 26 / 28 | epoch 5 | time: 3360.09s | valid loss 1.0445 | valid ppl 2.8420 | learning rate 20.0000
|
139 |
+
| end of split 27 / 28 | epoch 5 | time: 3360.59s | valid loss 1.0434 | valid ppl 2.8390 | learning rate 20.0000
|
140 |
+
| end of split 28 / 28 | epoch 5 | time: 3355.31s | valid loss 1.0463 | valid ppl 2.8472 | learning rate 20.0000
|
141 |
+
| end of split 1 / 28 | epoch 6 | time: 3342.94s | valid loss 1.0447 | valid ppl 2.8424 | learning rate 20.0000
|
142 |
+
| end of split 2 / 28 | epoch 6 | time: 3349.59s | valid loss 1.0426 | valid ppl 2.8366 | learning rate 20.0000
|
143 |
+
| end of split 3 / 28 | epoch 6 | time: 3350.27s | valid loss 1.0440 | valid ppl 2.8405 | learning rate 20.0000
|
144 |
+
| end of split 4 / 28 | epoch 6 | time: 3352.12s | valid loss 1.0435 | valid ppl 2.8393 | learning rate 20.0000
|
145 |
+
| end of split 5 / 28 | epoch 6 | time: 3352.49s | valid loss 1.0418 | valid ppl 2.8342 | learning rate 20.0000
|
146 |
+
| end of split 6 / 28 | epoch 6 | time: 3353.69s | valid loss 1.0441 | valid ppl 2.8409 | learning rate 20.0000
|
147 |
+
| end of split 7 / 28 | epoch 6 | time: 3351.74s | valid loss 1.0437 | valid ppl 2.8396 | learning rate 20.0000
|
148 |
+
| end of split 8 / 28 | epoch 6 | time: 3354.03s | valid loss 1.0417 | valid ppl 2.8339 | learning rate 20.0000
|
149 |
+
| end of split 9 / 28 | epoch 6 | time: 3355.56s | valid loss 1.0409 | valid ppl 2.8319 | learning rate 20.0000
|
150 |
+
| end of split 10 / 28 | epoch 6 | time: 3353.42s | valid loss 1.0410 | valid ppl 2.8320 | learning rate 20.0000
|
151 |
+
| end of split 11 / 28 | epoch 6 | time: 3346.88s | valid loss 1.0406 | valid ppl 2.8308 | learning rate 20.0000
|
152 |
+
| end of split 12 / 28 | epoch 6 | time: 3351.99s | valid loss 1.0438 | valid ppl 2.8400 | learning rate 20.0000
|
153 |
+
| end of split 13 / 28 | epoch 6 | time: 3363.46s | valid loss 1.0416 | valid ppl 2.8338 | learning rate 20.0000
|
154 |
+
| end of split 14 / 28 | epoch 6 | time: 991.85s | valid loss 1.0420 | valid ppl 2.8350 | learning rate 20.0000
|
155 |
+
| end of split 15 / 28 | epoch 6 | time: 3390.33s | valid loss 1.0414 | valid ppl 2.8330 | learning rate 20.0000
|
156 |
+
| end of split 16 / 28 | epoch 6 | time: 3389.41s | valid loss 1.0402 | valid ppl 2.8297 | learning rate 20.0000
|
157 |
+
| end of split 17 / 28 | epoch 6 | time: 3389.89s | valid loss 1.0404 | valid ppl 2.8303 | learning rate 20.0000
|
158 |
+
| end of split 18 / 28 | epoch 6 | time: 3380.84s | valid loss 1.0426 | valid ppl 2.8367 | learning rate 20.0000
|
159 |
+
| end of split 19 / 28 | epoch 6 | time: 3391.71s | valid loss 1.0410 | valid ppl 2.8321 | learning rate 20.0000
|
160 |
+
| end of split 20 / 28 | epoch 6 | time: 3380.11s | valid loss 1.0404 | valid ppl 2.8304 | learning rate 20.0000
|
161 |
+
| end of split 21 / 28 | epoch 6 | time: 3389.38s | valid loss 1.0398 | valid ppl 2.8287 | learning rate 20.0000
|
162 |
+
| end of split 22 / 28 | epoch 6 | time: 3384.16s | valid loss 1.0410 | valid ppl 2.8321 | learning rate 20.0000
|
163 |
+
| end of split 23 / 28 | epoch 6 | time: 3386.73s | valid loss 1.0423 | valid ppl 2.8357 | learning rate 20.0000
|
164 |
+
| end of split 24 / 28 | epoch 6 | time: 3384.70s | valid loss 1.0402 | valid ppl 2.8299 | learning rate 20.0000
|
165 |
+
| end of split 25 / 28 | epoch 6 | time: 3380.35s | valid loss 1.0393 | valid ppl 2.8272 | learning rate 20.0000
|
166 |
+
| end of split 26 / 28 | epoch 6 | time: 3379.51s | valid loss 1.0420 | valid ppl 2.8350 | learning rate 20.0000
|
167 |
+
| end of split 27 / 28 | epoch 6 | time: 3374.38s | valid loss 1.0412 | valid ppl 2.8326 | learning rate 20.0000
|
168 |
+
| end of split 28 / 28 | epoch 6 | time: 3368.86s | valid loss 1.0386 | valid ppl 2.8252 | learning rate 20.0000
|
169 |
+
| end of split 29 / 28 | epoch 6 | time: 3370.84s | valid loss 1.0389 | valid ppl 2.8262 | learning rate 20.0000
|
170 |
+
| end of split 30 / 28 | epoch 6 | time: 3387.68s | valid loss 1.0395 | valid ppl 2.8277 | learning rate 20.0000
|
171 |
+
| end of split 31 / 28 | epoch 6 | time: 3375.92s | valid loss 1.0390 | valid ppl 2.8265 | learning rate 20.0000
|
172 |
+
| end of split 32 / 28 | epoch 6 | time: 3383.55s | valid loss 1.0388 | valid ppl 2.8258 | learning rate 20.0000
|
173 |
+
| end of split 33 / 28 | epoch 6 | time: 3381.55s | valid loss 1.0378 | valid ppl 2.8230 | learning rate 20.0000
|
174 |
+
| end of split 34 / 28 | epoch 6 | time: 991.13s | valid loss 1.0382 | valid ppl 2.8241 | learning rate 20.0000
|
175 |
+
| end of split 35 / 28 | epoch 6 | time: 3384.35s | valid loss 1.0382 | valid ppl 2.8240 | learning rate 20.0000
|
176 |
+
| end of split 36 / 28 | epoch 6 | time: 3380.81s | valid loss 1.0377 | valid ppl 2.8228 | learning rate 20.0000
|
177 |
+
| end of split 37 / 28 | epoch 6 | time: 3383.28s | valid loss 1.0381 | valid ppl 2.8239 | learning rate 20.0000
|
178 |
+
| end of split 38 / 28 | epoch 6 | time: 3382.18s | valid loss 1.0380 | valid ppl 2.8236 | learning rate 20.0000
|
179 |
+
| end of split 39 / 28 | epoch 6 | time: 3389.48s | valid loss 1.0371 | valid ppl 2.8210 | learning rate 20.0000
|
180 |
+
| end of split 40 / 28 | epoch 6 | time: 3388.70s | valid loss 1.0386 | valid ppl 2.8252 | learning rate 20.0000
|
181 |
+
| end of split 41 / 28 | epoch 6 | time: 3390.47s | valid loss 1.0372 | valid ppl 2.8214 | learning rate 20.0000
|
182 |
+
| end of split 42 / 28 | epoch 6 | time: 3393.85s | valid loss 1.0376 | valid ppl 2.8225 | learning rate 20.0000
|
183 |
+
| end of split 43 / 28 | epoch 6 | time: 3406.04s | valid loss 1.0363 | valid ppl 2.8189 | learning rate 20.0000
|
184 |
+
| end of split 44 / 28 | epoch 6 | time: 3466.16s | valid loss 1.0365 | valid ppl 2.8194 | learning rate 20.0000
|
185 |
+
| end of split 45 / 28 | epoch 6 | time: 3444.11s | valid loss 1.0368 | valid ppl 2.8203 | learning rate 20.0000
|
186 |
+
| end of split 46 / 28 | epoch 6 | time: 3436.15s | valid loss 1.0368 | valid ppl 2.8202 | learning rate 20.0000
|
187 |
+
| end of split 47 / 28 | epoch 6 | time: 3434.69s | valid loss 1.0367 | valid ppl 2.8198 | learning rate 20.0000
|
188 |
+
| end of split 48 / 28 | epoch 6 | time: 3429.94s | valid loss 1.0401 | valid ppl 2.8295 | learning rate 20.0000
|
189 |
+
| end of split 49 / 28 | epoch 6 | time: 3426.04s | valid loss 1.0363 | valid ppl 2.8187 | learning rate 20.0000
|
190 |
+
| end of split 50 / 28 | epoch 6 | time: 3421.10s | valid loss 1.0364 | valid ppl 2.8190 | learning rate 20.0000
|
191 |
+
| end of split 51 / 28 | epoch 6 | time: 3412.38s | valid loss 1.0376 | valid ppl 2.8224 | learning rate 20.0000
|
192 |
+
| end of split 52 / 28 | epoch 6 | time: 3396.50s | valid loss 1.0363 | valid ppl 2.8187 | learning rate 20.0000
|
193 |
+
| end of split 53 / 28 | epoch 6 | time: 12135.07s | valid loss 1.0356 | valid ppl 2.8167 | learning rate 20.0000
|
194 |
+
| end of split 54 / 28 | epoch 6 | time: 3364.82s | valid loss 1.0363 | valid ppl 2.8187 | learning rate 20.0000
|
195 |
+
| end of split 55 / 28 | epoch 6 | time: 3390.69s | valid loss 1.0379 | valid ppl 2.8233 | learning rate 20.0000
|
196 |
+
| end of split 56 / 28 | epoch 6 | time: 3405.54s | valid loss 1.0355 | valid ppl 2.8164 | learning rate 20.0000
|
197 |
+
| end of split 29 / 28 | epoch 7 | time: 3355.56s | valid loss 1.0346 | valid ppl 2.8140 | learning rate 20.0000
|
198 |
+
| end of split 30 / 28 | epoch 7 | time: 3398.95s | valid loss 1.0354 | valid ppl 2.8164 | learning rate 20.0000
|
199 |
+
| end of split 31 / 28 | epoch 7 | time: 3403.92s | valid loss 1.0353 | valid ppl 2.8160 | learning rate 20.0000
|
200 |
+
| end of split 32 / 28 | epoch 7 | time: 3401.23s | valid loss 1.0362 | valid ppl 2.8186 | learning rate 20.0000
|
201 |
+
| end of split 33 / 28 | epoch 7 | time: 3399.23s | valid loss 1.0356 | valid ppl 2.8167 | learning rate 20.0000
|
202 |
+
| end of split 34 / 28 | epoch 7 | time: 3398.69s | valid loss 1.0361 | valid ppl 2.8182 | learning rate 20.0000
|
203 |
+
| end of split 35 / 28 | epoch 7 | time: 3404.16s | valid loss 1.0369 | valid ppl 2.8206 | learning rate 20.0000
|
204 |
+
| end of split 36 / 28 | epoch 7 | time: 3399.67s | valid loss 1.0342 | valid ppl 2.8128 | learning rate 20.0000
|
205 |
+
| end of split 37 / 28 | epoch 7 | time: 3398.06s | valid loss 1.0352 | valid ppl 2.8156 | learning rate 20.0000
|
206 |
+
| end of split 38 / 28 | epoch 7 | time: 3401.95s | valid loss 1.0352 | valid ppl 2.8157 | learning rate 20.0000
|
207 |
+
| end of split 39 / 28 | epoch 7 | time: 3422.71s | valid loss 1.0361 | valid ppl 2.8182 | learning rate 20.0000
|
208 |
+
| end of split 40 / 28 | epoch 7 | time: 1003.24s | valid loss 1.0355 | valid ppl 2.8166 | learning rate 20.0000
|
209 |
+
| end of split 41 / 28 | epoch 7 | time: 3424.69s | valid loss 1.0350 | valid ppl 2.8150 | learning rate 20.0000
|
210 |
+
| end of split 42 / 28 | epoch 7 | time: 3426.53s | valid loss 1.0353 | valid ppl 2.8159 | learning rate 20.0000
|
211 |
+
| end of split 43 / 28 | epoch 7 | time: 3426.58s | valid loss 1.0340 | valid ppl 2.8124 | learning rate 20.0000
|
212 |
+
| end of split 44 / 28 | epoch 7 | time: 3423.52s | valid loss 1.0354 | valid ppl 2.8161 | learning rate 20.0000
|
213 |
+
| end of split 45 / 28 | epoch 7 | time: 3416.13s | valid loss 1.0329 | valid ppl 2.8093 | learning rate 20.0000
|
214 |
+
| end of split 46 / 28 | epoch 7 | time: 3412.69s | valid loss 1.0351 | valid ppl 2.8155 | learning rate 20.0000
|
215 |
+
| end of split 47 / 28 | epoch 7 | time: 3407.49s | valid loss 1.0340 | valid ppl 2.8123 | learning rate 20.0000
|
216 |
+
| end of split 48 / 28 | epoch 7 | time: 3404.42s | valid loss 1.0327 | valid ppl 2.8086 | learning rate 20.0000
|
217 |
+
| end of split 49 / 28 | epoch 7 | time: 3400.72s | valid loss 1.0335 | valid ppl 2.8110 | learning rate 20.0000
|
218 |
+
| end of split 50 / 28 | epoch 7 | time: 3396.61s | valid loss 1.0341 | valid ppl 2.8126 | learning rate 20.0000
|
219 |
+
| end of split 51 / 28 | epoch 7 | time: 3393.63s | valid loss 1.0351 | valid ppl 2.8155 | learning rate 20.0000
|
220 |
+
| end of split 52 / 28 | epoch 7 | time: 3387.82s | valid loss 1.0321 | valid ppl 2.8070 | learning rate 20.0000
|
221 |
+
| end of split 53 / 28 | epoch 7 | time: 3373.80s | valid loss 1.0349 | valid ppl 2.8147 | learning rate 20.0000
|
222 |
+
| end of split 54 / 28 | epoch 7 | time: 3383.16s | valid loss 1.0321 | valid ppl 2.8069 | learning rate 20.0000
|
223 |
+
| end of split 55 / 28 | epoch 7 | time: 3385.56s | valid loss 1.0322 | valid ppl 2.8072 | learning rate 20.0000
|
224 |
+
| end of split 56 / 28 | epoch 7 | time: 3382.85s | valid loss 1.0320 | valid ppl 2.8066 | learning rate 20.0000
|
225 |
+
| end of split 29 / 28 | epoch 8 | time: 3371.57s | valid loss 1.0326 | valid ppl 2.8084 | learning rate 20.0000
|
226 |
+
| end of split 30 / 28 | epoch 8 | time: 3382.16s | valid loss 1.0344 | valid ppl 2.8133 | learning rate 20.0000
|
227 |
+
| end of split 31 / 28 | epoch 8 | time: 3373.30s | valid loss 1.0327 | valid ppl 2.8087 | learning rate 20.0000
|
228 |
+
| end of split 32 / 28 | epoch 8 | time: 3345.95s | valid loss 1.0332 | valid ppl 2.8102 | learning rate 20.0000
|
229 |
+
| end of split 33 / 28 | epoch 8 | time: 3368.96s | valid loss 1.0326 | valid ppl 2.8084 | learning rate 20.0000
|
230 |
+
| end of split 34 / 28 | epoch 8 | time: 3388.68s | valid loss 1.0318 | valid ppl 2.8062 | learning rate 20.0000
|
231 |
+
| end of split 35 / 28 | epoch 8 | time: 3373.57s | valid loss 1.0336 | valid ppl 2.8113 | learning rate 20.0000
|
232 |
+
| end of split 36 / 28 | epoch 8 | time: 3375.36s | valid loss 1.0342 | valid ppl 2.8127 | learning rate 20.0000
|
233 |
+
| end of split 37 / 28 | epoch 8 | time: 3374.28s | valid loss 1.0311 | valid ppl 2.8042 | learning rate 20.0000
|
234 |
+
| end of split 38 / 28 | epoch 8 | time: 3386.87s | valid loss 1.0321 | valid ppl 2.8070 | learning rate 20.0000
|
235 |
+
| end of split 39 / 28 | epoch 8 | time: 3385.72s | valid loss 1.0312 | valid ppl 2.8044 | learning rate 20.0000
|
236 |
+
| end of split 40 / 28 | epoch 8 | time: 991.55s | valid loss 1.0349 | valid ppl 2.8149 | learning rate 20.0000
|
237 |
+
| end of split 41 / 28 | epoch 8 | time: 3384.06s | valid loss 1.0315 | valid ppl 2.8052 | learning rate 20.0000
|
238 |
+
| end of split 42 / 28 | epoch 8 | time: 3380.84s | valid loss 1.0332 | valid ppl 2.8102 | learning rate 20.0000
|
239 |
+
| end of split 43 / 28 | epoch 8 | time: 3372.24s | valid loss 1.0324 | valid ppl 2.8079 | learning rate 20.0000
|
240 |
+
| end of split 44 / 28 | epoch 8 | time: 3367.32s | valid loss 1.0343 | valid ppl 2.8133 | learning rate 20.0000
|
241 |
+
| end of split 45 / 28 | epoch 8 | time: 3362.88s | valid loss 1.0305 | valid ppl 2.8026 | learning rate 20.0000
|
242 |
+
| end of split 46 / 28 | epoch 8 | time: 3352.06s | valid loss 1.0317 | valid ppl 2.8058 | learning rate 20.0000
|
243 |
+
| end of split 47 / 28 | epoch 8 | time: 5236.04s | valid loss 1.0310 | valid ppl 2.8038 | learning rate 20.0000
|
244 |
+
| end of split 48 / 28 | epoch 8 | time: 3337.66s | valid loss 1.0318 | valid ppl 2.8061 | learning rate 20.0000
|
245 |
+
| end of split 49 / 28 | epoch 8 | time: 3352.64s | valid loss 1.0319 | valid ppl 2.8064 | learning rate 20.0000
|
246 |
+
| end of split 50 / 28 | epoch 8 | time: 3353.74s | valid loss 1.0301 | valid ppl 2.8014 | learning rate 20.0000
|
247 |
+
| end of split 51 / 28 | epoch 8 | time: 3355.81s | valid loss 1.0329 | valid ppl 2.8092 | learning rate 20.0000
|
248 |
+
| end of split 52 / 28 | epoch 8 | time: 3345.28s | valid loss 1.0624 | valid ppl 2.8934 | learning rate 20.0000
|
249 |
+
| end of split 53 / 28 | epoch 8 | time: 3348.72s | valid loss 1.0307 | valid ppl 2.8031 | learning rate 20.0000
|
250 |
+
| end of split 54 / 28 | epoch 8 | time: 3349.58s | valid loss 1.0310 | valid ppl 2.8040 | learning rate 20.0000
|
251 |
+
| end of split 55 / 28 | epoch 8 | time: 3348.67s | valid loss 1.0302 | valid ppl 2.8017 | learning rate 20.0000
|
252 |
+
| end of split 56 / 28 | epoch 8 | time: 3346.76s | valid loss 1.0311 | valid ppl 2.8042 | learning rate 20.0000
|
253 |
+
| end of split 29 / 28 | epoch 9 | time: 3333.15s | valid loss 1.0323 | valid ppl 2.8076 | learning rate 20.0000
|
254 |
+
| end of split 30 / 28 | epoch 9 | time: 3355.20s | valid loss 1.0298 | valid ppl 2.8005 | learning rate 20.0000
|
255 |
+
| end of split 31 / 28 | epoch 9 | time: 3358.57s | valid loss 1.0301 | valid ppl 2.8012 | learning rate 20.0000
|
256 |
+
| end of split 32 / 28 | epoch 9 | time: 985.22s | valid loss 1.0299 | valid ppl 2.8007 | learning rate 20.0000
|
257 |
+
| end of split 33 / 28 | epoch 9 | time: 3364.70s | valid loss 1.0308 | valid ppl 2.8033 | learning rate 20.0000
|
258 |
+
| end of split 34 / 28 | epoch 9 | time: 3358.86s | valid loss 1.0299 | valid ppl 2.8008 | learning rate 20.0000
|
259 |
+
| end of split 35 / 28 | epoch 9 | time: 3373.80s | valid loss 1.0299 | valid ppl 2.8008 | learning rate 20.0000
|
260 |
+
| end of split 36 / 28 | epoch 9 | time: 3349.58s | valid loss 1.0294 | valid ppl 2.7993 | learning rate 20.0000
|
261 |
+
| end of split 37 / 28 | epoch 9 | time: 3363.90s | valid loss 1.0297 | valid ppl 2.8002 | learning rate 20.0000
|
262 |
+
| end of split 38 / 28 | epoch 9 | time: 3374.51s | valid loss 1.0307 | valid ppl 2.8031 | learning rate 20.0000
|
263 |
+
| end of split 39 / 28 | epoch 9 | time: 3368.04s | valid loss 1.0285 | valid ppl 2.7968 | learning rate 20.0000
|
264 |
+
| end of split 40 / 28 | epoch 9 | time: 3367.10s | valid loss 1.0289 | valid ppl 2.7979 | learning rate 20.0000
|
265 |
+
| end of split 41 / 28 | epoch 9 | time: 3362.55s | valid loss 1.0295 | valid ppl 2.7997 | learning rate 20.0000
|
266 |
+
| end of split 42 / 28 | epoch 9 | time: 3354.89s | valid loss 1.0287 | valid ppl 2.7975 | learning rate 20.0000
|
267 |
+
| end of split 43 / 28 | epoch 9 | time: 3351.48s | valid loss 1.0285 | valid ppl 2.7968 | learning rate 20.0000
|
268 |
+
| end of split 44 / 28 | epoch 9 | time: 3347.75s | valid loss 1.0299 | valid ppl 2.8009 | learning rate 20.0000
|
269 |
+
| end of split 45 / 28 | epoch 9 | time: 3353.75s | valid loss 1.0280 | valid ppl 2.7956 | learning rate 20.0000
|
270 |
+
| end of split 46 / 28 | epoch 9 | time: 3340.75s | valid loss 1.0294 | valid ppl 2.7994 | learning rate 20.0000
|
271 |
+
| end of split 47 / 28 | epoch 9 | time: 3350.77s | valid loss 1.0285 | valid ppl 2.7968 | learning rate 20.0000
|
272 |
+
| end of split 48 / 28 | epoch 9 | time: 3351.99s | valid loss 1.0278 | valid ppl 2.7948 | learning rate 20.0000
|
273 |
+
| end of split 49 / 28 | epoch 9 | time: 3341.78s | valid loss 1.0283 | valid ppl 2.7964 | learning rate 20.0000
|
274 |
+
| end of split 50 / 28 | epoch 9 | time: 3338.92s | valid loss 1.0302 | valid ppl 2.8016 | learning rate 20.0000
|
275 |
+
| end of split 51 / 28 | epoch 9 | time: 3338.22s | valid loss 1.0293 | valid ppl 2.7991 | learning rate 20.0000
|
276 |
+
| end of split 52 / 28 | epoch 9 | time: 3348.00s | valid loss 1.0286 | valid ppl 2.7970 | learning rate 20.0000
|
277 |
+
| end of split 53 / 28 | epoch 9 | time: 3340.37s | valid loss 1.0293 | valid ppl 2.7992 | learning rate 20.0000
|
278 |
+
| end of split 54 / 28 | epoch 9 | time: 3327.53s | valid loss 1.0279 | valid ppl 2.7951 | learning rate 20.0000
|
279 |
+
| end of split 55 / 28 | epoch 9 | time: 3335.99s | valid loss 1.0273 | valid ppl 2.7937 | learning rate 20.0000
|
280 |
+
| end of split 56 / 28 | epoch 9 | time: 13980.53s | valid loss 1.0284 | valid ppl 2.7965 | learning rate 20.0000
|
281 |
+
| end of split 29 / 28 | epoch 10 | time: 3355.65s | valid loss 1.0281 | valid ppl 2.7959 | learning rate 20.0000
|
282 |
+
| end of split 30 / 28 | epoch 10 | time: 3366.79s | valid loss 1.0287 | valid ppl 2.7973 | learning rate 20.0000
|
283 |
+
| end of split 31 / 28 | epoch 10 | time: 3368.82s | valid loss 1.0287 | valid ppl 2.7973 | learning rate 20.0000
|
284 |
+
| end of split 32 / 28 | epoch 10 | time: 990.06s | valid loss 1.0327 | valid ppl 2.8085 | learning rate 20.0000
|
285 |
+
| end of split 33 / 28 | epoch 10 | time: 3381.50s | valid loss 1.0277 | valid ppl 2.7948 | learning rate 20.0000
|
286 |
+
| end of split 34 / 28 | epoch 10 | time: 3384.53s | valid loss 1.0288 | valid ppl 2.7977 | learning rate 20.0000
|
287 |
+
| end of split 35 / 28 | epoch 10 | time: 3387.23s | valid loss 1.0335 | valid ppl 2.8108 | learning rate 20.0000
|
288 |
+
| end of split 36 / 28 | epoch 10 | time: 3367.46s | valid loss 1.0284 | valid ppl 2.7967 | learning rate 20.0000
|
289 |
+
| end of split 37 / 28 | epoch 10 | time: 3381.33s | valid loss 1.0273 | valid ppl 2.7936 | learning rate 20.0000
|
290 |
+
| end of split 38 / 28 | epoch 10 | time: 3373.72s | valid loss 1.0273 | valid ppl 2.7936 | learning rate 20.0000
|
291 |
+
| end of split 39 / 28 | epoch 10 | time: 3367.39s | valid loss 1.0228 | valid ppl 2.7810 | learning rate 5.0000
|
292 |
+
| end of split 40 / 28 | epoch 10 | time: 3365.36s | valid loss 1.0225 | valid ppl 2.7803 | learning rate 5.0000
|
293 |
+
| end of split 41 / 28 | epoch 10 | time: 3366.86s | valid loss 1.0223 | valid ppl 2.7796 | learning rate 5.0000
|
294 |
+
| end of split 42 / 28 | epoch 10 | time: 3368.15s | valid loss 1.0223 | valid ppl 2.7796 | learning rate 5.0000
|
295 |
+
| end of split 43 / 28 | epoch 10 | time: 3362.30s | valid loss 1.0220 | valid ppl 2.7789 | learning rate 5.0000
|
296 |
+
| end of split 44 / 28 | epoch 10 | time: 3364.78s | valid loss 1.0220 | valid ppl 2.7786 | learning rate 5.0000
|
297 |
+
| end of split 45 / 28 | epoch 10 | time: 3362.51s | valid loss 1.0219 | valid ppl 2.7784 | learning rate 5.0000
|
298 |
+
| end of split 46 / 28 | epoch 10 | time: 3366.20s | valid loss 1.0217 | valid ppl 2.7779 | learning rate 5.0000
|
299 |
+
| end of split 47 / 28 | epoch 10 | time: 3354.09s | valid loss 1.0217 | valid ppl 2.7779 | learning rate 5.0000
|
300 |
+
| end of split 48 / 28 | epoch 10 | time: 3361.91s | valid loss 1.0217 | valid ppl 2.7778 | learning rate 5.0000
|
301 |
+
| end of split 49 / 28 | epoch 10 | time: 3359.11s | valid loss 1.0215 | valid ppl 2.7775 | learning rate 5.0000
|
302 |
+
| end of split 50 / 28 | epoch 10 | time: 3354.83s | valid loss 1.0218 | valid ppl 2.7782 | learning rate 5.0000
|
303 |
+
| end of split 51 / 28 | epoch 10 | time: 3364.29s | valid loss 1.0215 | valid ppl 2.7773 | learning rate 5.0000
|
304 |
+
| end of split 52 / 28 | epoch 10 | time: 3387.06s | valid loss 1.0215 | valid ppl 2.7772 | learning rate 5.0000
|
305 |
+
| end of split 53 / 28 | epoch 10 | time: 3386.93s | valid loss 1.0214 | valid ppl 2.7771 | learning rate 5.0000
|
306 |
+
| end of split 54 / 28 | epoch 10 | time: 3388.66s | valid loss 1.0212 | valid ppl 2.7766 | learning rate 5.0000
|
307 |
+
| end of split 55 / 28 | epoch 10 | time: 3386.75s | valid loss 1.0212 | valid ppl 2.7764 | learning rate 5.0000
|
308 |
+
| end of split 56 / 28 | epoch 10 | time: 3386.25s | valid loss 1.0213 | valid ppl 2.7767 | learning rate 5.0000
|
pipeline.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os.path
|
2 |
+
from typing import List, Dict
|
3 |
+
from flair.models.language_model import LanguageModel
|
4 |
+
|
5 |
+
|
6 |
+
class PreTrainedPipeline:
|
7 |
+
def __init__(self, path=""):
|
8 |
+
# IMPLEMENT_THIS
|
9 |
+
# Preload all the elements you are going to need at inference.
|
10 |
+
# For instance your model, processors, tokenizer that might be needed.
|
11 |
+
# This function is only called once, so do all the heavy processing I/O here"""
|
12 |
+
self.model = LanguageModel.load_language_model("best-lm.pt")
|
13 |
+
|
14 |
+
def __call__(self, inputs: str) -> List[Dict]:
|
15 |
+
"""
|
16 |
+
Args:
|
17 |
+
inputs (:obj:`str`):
|
18 |
+
a string containing some text
|
19 |
+
Return:
|
20 |
+
A :obj:`str`
|
21 |
+
"""
|
22 |
+
inputs = inputs.strip()
|
23 |
+
return [{"generated_text": self.model.generate_text([inputs])[0]}]
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
flair
|