Daniil Larionov commited on
Commit
3d62abc
1 Parent(s): 851c70c

End of training

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
all_results.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_PREDICATE_f1": 0.9465648854961832,
4
+ "eval_PREDICATE_number": 129,
5
+ "eval_PREDICATE_precision": 0.9323308270676691,
6
+ "eval_PREDICATE_recall": 0.9612403100775194,
7
+ "eval_loss": 0.2416924387216568,
8
+ "eval_overall_accuracy": 0.9583558680367766,
9
+ "eval_overall_f1": 0.8395061728395061,
10
+ "eval_overall_precision": 0.8429752066115702,
11
+ "eval_overall_recall": 0.8360655737704918,
12
+ "eval_runtime": 0.5304,
13
+ "eval_samples": 96,
14
+ "eval_samples_per_second": 180.983,
15
+ "eval_steps_per_second": 22.623,
16
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
17
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
18
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
19
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
20
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6917293233082706,
21
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
22
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.7666666666666667,
23
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.6301369863013698,
24
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.7555555555555555,
25
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
26
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6938775510204082,
27
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.8292682926829268,
28
+ "train_loss": 0.10899043403289936,
29
+ "train_runtime": 173.8981,
30
+ "train_samples": 859,
31
+ "train_samples_per_second": 49.397,
32
+ "train_steps_per_second": 3.105
33
+ }
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./ruBert-base/",
3
+ "architectures": [
4
+ "BertForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "finetuning_task": "ner",
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "B-PREDICATE",
15
+ "1": "B-\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422",
16
+ "2": "B-\u041a\u0410\u0423\u0417\u0410\u0422\u0418\u0412",
17
+ "3": "B-\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420",
18
+ "4": "B-\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420",
19
+ "5": "O"
20
+ },
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 3072,
23
+ "label2id": {
24
+ "B-PREDICATE": 0,
25
+ "B-\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422": 1,
26
+ "B-\u041a\u0410\u0423\u0417\u0410\u0422\u0418\u0412": 2,
27
+ "B-\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420": 3,
28
+ "B-\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420": 4,
29
+ "O": 5
30
+ },
31
+ "layer_norm_eps": 1e-12,
32
+ "max_position_embeddings": 512,
33
+ "model_type": "bert",
34
+ "num_attention_heads": 12,
35
+ "num_hidden_layers": 12,
36
+ "pad_token_id": 0,
37
+ "pooler_fc_size": 768,
38
+ "pooler_num_attention_heads": 12,
39
+ "pooler_num_fc_layers": 3,
40
+ "pooler_size_per_head": 128,
41
+ "pooler_type": "first_token_transform",
42
+ "position_embedding_type": "absolute",
43
+ "torch_dtype": "float32",
44
+ "transformers_version": "4.13.0.dev0",
45
+ "type_vocab_size": 2,
46
+ "use_cache": true,
47
+ "vocab_size": 120138
48
+ }
eval_results.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_PREDICATE_f1": 0.9465648854961832,
4
+ "eval_PREDICATE_number": 129,
5
+ "eval_PREDICATE_precision": 0.9323308270676691,
6
+ "eval_PREDICATE_recall": 0.9612403100775194,
7
+ "eval_loss": 0.2416924387216568,
8
+ "eval_overall_accuracy": 0.9583558680367766,
9
+ "eval_overall_f1": 0.8395061728395061,
10
+ "eval_overall_precision": 0.8429752066115702,
11
+ "eval_overall_recall": 0.8360655737704918,
12
+ "eval_runtime": 0.5304,
13
+ "eval_samples": 96,
14
+ "eval_samples_per_second": 180.983,
15
+ "eval_steps_per_second": 22.623,
16
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
17
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
18
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
19
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
20
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6917293233082706,
21
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
22
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.7666666666666667,
23
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.6301369863013698,
24
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.7555555555555555,
25
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
26
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6938775510204082,
27
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.8292682926829268
28
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab14cf1798a2cb5934d60be7d84ff133f9ea70fd7878d397a0bd97db208d990d
3
+ size 710968817
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": null, "special_tokens_map_file": null, "name_or_path": "./ruBert-base/", "do_basic_tokenize": true, "never_split": null, "tokenizer_class": "BertTokenizer"}
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.10899043403289936,
4
+ "train_runtime": 173.8981,
5
+ "train_samples": 859,
6
+ "train_samples_per_second": 49.397,
7
+ "train_steps_per_second": 3.105
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,959 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 540,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 1.5151515151515152e-06,
13
+ "loss": 1.7524,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.09,
18
+ "learning_rate": 7.5757575757575764e-06,
19
+ "loss": 1.7511,
20
+ "step": 5
21
+ },
22
+ {
23
+ "epoch": 0.19,
24
+ "learning_rate": 1.5151515151515153e-05,
25
+ "loss": 1.5404,
26
+ "step": 10
27
+ },
28
+ {
29
+ "epoch": 0.28,
30
+ "learning_rate": 2.272727272727273e-05,
31
+ "loss": 1.118,
32
+ "step": 15
33
+ },
34
+ {
35
+ "epoch": 0.37,
36
+ "learning_rate": 3.0303030303030306e-05,
37
+ "loss": 0.6504,
38
+ "step": 20
39
+ },
40
+ {
41
+ "epoch": 0.46,
42
+ "learning_rate": 3.787878787878788e-05,
43
+ "loss": 0.8655,
44
+ "step": 25
45
+ },
46
+ {
47
+ "epoch": 0.56,
48
+ "learning_rate": 4.545454545454546e-05,
49
+ "loss": 0.4171,
50
+ "step": 30
51
+ },
52
+ {
53
+ "epoch": 0.65,
54
+ "learning_rate": 4.999808023410233e-05,
55
+ "loss": 0.3723,
56
+ "step": 35
57
+ },
58
+ {
59
+ "epoch": 0.74,
60
+ "learning_rate": 4.997648625366471e-05,
61
+ "loss": 0.3472,
62
+ "step": 40
63
+ },
64
+ {
65
+ "epoch": 0.83,
66
+ "learning_rate": 4.993091938082206e-05,
67
+ "loss": 0.1754,
68
+ "step": 45
69
+ },
70
+ {
71
+ "epoch": 0.93,
72
+ "learning_rate": 4.986142335149978e-05,
73
+ "loss": 0.2462,
74
+ "step": 50
75
+ },
76
+ {
77
+ "epoch": 1.0,
78
+ "eval_PREDICATE_f1": 0.9416058394160584,
79
+ "eval_PREDICATE_number": 129,
80
+ "eval_PREDICATE_precision": 0.8896551724137931,
81
+ "eval_PREDICATE_recall": 1.0,
82
+ "eval_loss": 0.15542413294315338,
83
+ "eval_overall_accuracy": 0.9486208761492698,
84
+ "eval_overall_f1": 0.8,
85
+ "eval_overall_precision": 0.8135593220338984,
86
+ "eval_overall_recall": 0.7868852459016393,
87
+ "eval_runtime": 0.5007,
88
+ "eval_samples_per_second": 191.714,
89
+ "eval_steps_per_second": 23.964,
90
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
91
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
92
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
93
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
94
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6115702479338844,
95
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
96
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.7708333333333334,
97
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.5068493150684932,
98
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.6190476190476191,
99
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
100
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6046511627906976,
101
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.6341463414634146,
102
+ "step": 54
103
+ },
104
+ {
105
+ "epoch": 1.02,
106
+ "learning_rate": 4.976806486926926e-05,
107
+ "loss": 0.216,
108
+ "step": 55
109
+ },
110
+ {
111
+ "epoch": 1.11,
112
+ "learning_rate": 4.9650933541324506e-05,
113
+ "loss": 0.1322,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 1.2,
118
+ "learning_rate": 4.951014179247555e-05,
119
+ "loss": 0.1814,
120
+ "step": 65
121
+ },
122
+ {
123
+ "epoch": 1.3,
124
+ "learning_rate": 4.934582475724101e-05,
125
+ "loss": 0.1226,
126
+ "step": 70
127
+ },
128
+ {
129
+ "epoch": 1.39,
130
+ "learning_rate": 4.9158140150143484e-05,
131
+ "loss": 0.1595,
132
+ "step": 75
133
+ },
134
+ {
135
+ "epoch": 1.48,
136
+ "learning_rate": 4.8947268114332274e-05,
137
+ "loss": 0.172,
138
+ "step": 80
139
+ },
140
+ {
141
+ "epoch": 1.57,
142
+ "learning_rate": 4.8713411048678635e-05,
143
+ "loss": 0.1202,
144
+ "step": 85
145
+ },
146
+ {
147
+ "epoch": 1.67,
148
+ "learning_rate": 4.8456793413509634e-05,
149
+ "loss": 0.1816,
150
+ "step": 90
151
+ },
152
+ {
153
+ "epoch": 1.76,
154
+ "learning_rate": 4.817766151516693e-05,
155
+ "loss": 0.164,
156
+ "step": 95
157
+ },
158
+ {
159
+ "epoch": 1.85,
160
+ "learning_rate": 4.787628326959747e-05,
161
+ "loss": 0.1345,
162
+ "step": 100
163
+ },
164
+ {
165
+ "epoch": 1.94,
166
+ "learning_rate": 4.755294794520276e-05,
167
+ "loss": 0.1863,
168
+ "step": 105
169
+ },
170
+ {
171
+ "epoch": 2.0,
172
+ "eval_PREDICATE_f1": 0.9446494464944649,
173
+ "eval_PREDICATE_number": 129,
174
+ "eval_PREDICATE_precision": 0.9014084507042254,
175
+ "eval_PREDICATE_recall": 0.9922480620155039,
176
+ "eval_loss": 0.1267632097005844,
177
+ "eval_overall_accuracy": 0.9556517036235803,
178
+ "eval_overall_f1": 0.8220338983050847,
179
+ "eval_overall_precision": 0.8508771929824561,
180
+ "eval_overall_recall": 0.7950819672131147,
181
+ "eval_runtime": 0.5235,
182
+ "eval_samples_per_second": 183.39,
183
+ "eval_steps_per_second": 22.924,
184
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
185
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
186
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
187
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
188
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6440677966101694,
189
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
190
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.8444444444444444,
191
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.5205479452054794,
192
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.6829268292682927,
193
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
194
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6829268292682927,
195
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.6829268292682927,
196
+ "step": 108
197
+ },
198
+ {
199
+ "epoch": 2.04,
200
+ "learning_rate": 4.72079658851938e-05,
201
+ "loss": 0.0882,
202
+ "step": 110
203
+ },
204
+ {
205
+ "epoch": 2.13,
206
+ "learning_rate": 4.684166820971779e-05,
207
+ "loss": 0.0917,
208
+ "step": 115
209
+ },
210
+ {
211
+ "epoch": 2.22,
212
+ "learning_rate": 4.6454406498043105e-05,
213
+ "loss": 0.1179,
214
+ "step": 120
215
+ },
216
+ {
217
+ "epoch": 2.31,
218
+ "learning_rate": 4.604655245110684e-05,
219
+ "loss": 0.0599,
220
+ "step": 125
221
+ },
222
+ {
223
+ "epoch": 2.41,
224
+ "learning_rate": 4.561849753474951e-05,
225
+ "loss": 0.0923,
226
+ "step": 130
227
+ },
228
+ {
229
+ "epoch": 2.5,
230
+ "learning_rate": 4.517065260397887e-05,
231
+ "loss": 0.0903,
232
+ "step": 135
233
+ },
234
+ {
235
+ "epoch": 2.59,
236
+ "learning_rate": 4.470344750862368e-05,
237
+ "loss": 0.0941,
238
+ "step": 140
239
+ },
240
+ {
241
+ "epoch": 2.69,
242
+ "learning_rate": 4.421733068075595e-05,
243
+ "loss": 0.0881,
244
+ "step": 145
245
+ },
246
+ {
247
+ "epoch": 2.78,
248
+ "learning_rate": 4.371276870427753e-05,
249
+ "loss": 0.0653,
250
+ "step": 150
251
+ },
252
+ {
253
+ "epoch": 2.87,
254
+ "learning_rate": 4.319024586708428e-05,
255
+ "loss": 0.1036,
256
+ "step": 155
257
+ },
258
+ {
259
+ "epoch": 2.96,
260
+ "learning_rate": 4.265026369623761e-05,
261
+ "loss": 0.0668,
262
+ "step": 160
263
+ },
264
+ {
265
+ "epoch": 3.0,
266
+ "eval_PREDICATE_f1": 0.9584905660377359,
267
+ "eval_PREDICATE_number": 129,
268
+ "eval_PREDICATE_precision": 0.9338235294117647,
269
+ "eval_PREDICATE_recall": 0.9844961240310077,
270
+ "eval_loss": 0.1287979632616043,
271
+ "eval_overall_accuracy": 0.9599783666846944,
272
+ "eval_overall_f1": 0.8458333333333333,
273
+ "eval_overall_precision": 0.8601694915254238,
274
+ "eval_overall_recall": 0.8319672131147541,
275
+ "eval_runtime": 0.4971,
276
+ "eval_samples_per_second": 193.12,
277
+ "eval_steps_per_second": 24.14,
278
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
279
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
280
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
281
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
282
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6929133858267718,
283
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
284
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.8148148148148148,
285
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.6027397260273972,
286
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.7356321839080461,
287
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
288
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6956521739130435,
289
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.7804878048780488,
290
+ "step": 162
291
+ },
292
+ {
293
+ "epoch": 3.06,
294
+ "learning_rate": 4.209334047658956e-05,
295
+ "loss": 0.0458,
296
+ "step": 165
297
+ },
298
+ {
299
+ "epoch": 3.15,
300
+ "learning_rate": 4.152001075332341e-05,
301
+ "loss": 0.044,
302
+ "step": 170
303
+ },
304
+ {
305
+ "epoch": 3.24,
306
+ "learning_rate": 4.09308248188874e-05,
307
+ "loss": 0.0489,
308
+ "step": 175
309
+ },
310
+ {
311
+ "epoch": 3.33,
312
+ "learning_rate": 4.0326348184813826e-05,
313
+ "loss": 0.056,
314
+ "step": 180
315
+ },
316
+ {
317
+ "epoch": 3.43,
318
+ "learning_rate": 3.970716103893065e-05,
319
+ "loss": 0.05,
320
+ "step": 185
321
+ },
322
+ {
323
+ "epoch": 3.52,
324
+ "learning_rate": 3.907385768848656e-05,
325
+ "loss": 0.0403,
326
+ "step": 190
327
+ },
328
+ {
329
+ "epoch": 3.61,
330
+ "learning_rate": 3.842704598972384e-05,
331
+ "loss": 0.0573,
332
+ "step": 195
333
+ },
334
+ {
335
+ "epoch": 3.7,
336
+ "learning_rate": 3.776734676444678e-05,
337
+ "loss": 0.0624,
338
+ "step": 200
339
+ },
340
+ {
341
+ "epoch": 3.8,
342
+ "learning_rate": 3.709539320414544e-05,
343
+ "loss": 0.0465,
344
+ "step": 205
345
+ },
346
+ {
347
+ "epoch": 3.89,
348
+ "learning_rate": 3.641183026224675e-05,
349
+ "loss": 0.0677,
350
+ "step": 210
351
+ },
352
+ {
353
+ "epoch": 3.98,
354
+ "learning_rate": 3.5717314035076355e-05,
355
+ "loss": 0.039,
356
+ "step": 215
357
+ },
358
+ {
359
+ "epoch": 4.0,
360
+ "eval_PREDICATE_f1": 0.9407407407407407,
361
+ "eval_PREDICATE_number": 129,
362
+ "eval_PREDICATE_precision": 0.900709219858156,
363
+ "eval_PREDICATE_recall": 0.9844961240310077,
364
+ "eval_loss": 0.16953788697719574,
365
+ "eval_overall_accuracy": 0.9556517036235803,
366
+ "eval_overall_f1": 0.8309572301425663,
367
+ "eval_overall_precision": 0.8259109311740891,
368
+ "eval_overall_recall": 0.8360655737704918,
369
+ "eval_runtime": 0.5165,
370
+ "eval_samples_per_second": 185.878,
371
+ "eval_steps_per_second": 23.235,
372
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
373
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
374
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
375
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
376
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6499999999999999,
377
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
378
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.8297872340425532,
379
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.5342465753424658,
380
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.76,
381
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
382
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6440677966101694,
383
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.926829268292683,
384
+ "step": 216
385
+ },
386
+ {
387
+ "epoch": 4.07,
388
+ "learning_rate": 3.501251113212521e-05,
389
+ "loss": 0.0332,
390
+ "step": 220
391
+ },
392
+ {
393
+ "epoch": 4.17,
394
+ "learning_rate": 3.429809803622551e-05,
395
+ "loss": 0.0549,
396
+ "step": 225
397
+ },
398
+ {
399
+ "epoch": 4.26,
400
+ "learning_rate": 3.357476045424997e-05,
401
+ "loss": 0.0214,
402
+ "step": 230
403
+ },
404
+ {
405
+ "epoch": 4.35,
406
+ "learning_rate": 3.2843192658957774e-05,
407
+ "loss": 0.0352,
408
+ "step": 235
409
+ },
410
+ {
411
+ "epoch": 4.44,
412
+ "learning_rate": 3.210409682261866e-05,
413
+ "loss": 0.0521,
414
+ "step": 240
415
+ },
416
+ {
417
+ "epoch": 4.54,
418
+ "learning_rate": 3.135818234305511e-05,
419
+ "loss": 0.0267,
420
+ "step": 245
421
+ },
422
+ {
423
+ "epoch": 4.63,
424
+ "learning_rate": 3.060616516274921e-05,
425
+ "loss": 0.0454,
426
+ "step": 250
427
+ },
428
+ {
429
+ "epoch": 4.72,
430
+ "learning_rate": 2.984876708166782e-05,
431
+ "loss": 0.0234,
432
+ "step": 255
433
+ },
434
+ {
435
+ "epoch": 4.81,
436
+ "learning_rate": 2.908671506446566e-05,
437
+ "loss": 0.034,
438
+ "step": 260
439
+ },
440
+ {
441
+ "epoch": 4.91,
442
+ "learning_rate": 2.8320740542731212e-05,
443
+ "loss": 0.0198,
444
+ "step": 265
445
+ },
446
+ {
447
+ "epoch": 5.0,
448
+ "learning_rate": 2.7551578712945208e-05,
449
+ "loss": 0.0187,
450
+ "step": 270
451
+ },
452
+ {
453
+ "epoch": 5.0,
454
+ "eval_PREDICATE_f1": 0.9465648854961832,
455
+ "eval_PREDICATE_number": 129,
456
+ "eval_PREDICATE_precision": 0.9323308270676691,
457
+ "eval_PREDICATE_recall": 0.9612403100775194,
458
+ "eval_loss": 0.1955076903104782,
459
+ "eval_overall_accuracy": 0.955110870740941,
460
+ "eval_overall_f1": 0.8195329087048832,
461
+ "eval_overall_precision": 0.8502202643171806,
462
+ "eval_overall_recall": 0.7909836065573771,
463
+ "eval_runtime": 0.5123,
464
+ "eval_samples_per_second": 187.387,
465
+ "eval_steps_per_second": 23.423,
466
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
467
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
468
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
469
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
470
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6511627906976744,
471
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
472
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.75,
473
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.5753424657534246,
474
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.6835443037974684,
475
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
476
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.7105263157894737,
477
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.6585365853658537,
478
+ "step": 270
479
+ },
480
+ {
481
+ "epoch": 5.09,
482
+ "learning_rate": 2.6779967830825454e-05,
483
+ "loss": 0.0178,
484
+ "step": 275
485
+ },
486
+ {
487
+ "epoch": 5.19,
488
+ "learning_rate": 2.600664850273538e-05,
489
+ "loss": 0.0231,
490
+ "step": 280
491
+ },
492
+ {
493
+ "epoch": 5.28,
494
+ "learning_rate": 2.5232362974836394e-05,
495
+ "loss": 0.0149,
496
+ "step": 285
497
+ },
498
+ {
499
+ "epoch": 5.37,
500
+ "learning_rate": 2.4457854420666278e-05,
501
+ "loss": 0.0237,
502
+ "step": 290
503
+ },
504
+ {
505
+ "epoch": 5.46,
506
+ "learning_rate": 2.3683866227827603e-05,
507
+ "loss": 0.0134,
508
+ "step": 295
509
+ },
510
+ {
511
+ "epoch": 5.56,
512
+ "learning_rate": 2.2911141284470466e-05,
513
+ "loss": 0.0154,
514
+ "step": 300
515
+ },
516
+ {
517
+ "epoch": 5.65,
518
+ "learning_rate": 2.214042126625486e-05,
519
+ "loss": 0.0361,
520
+ "step": 305
521
+ },
522
+ {
523
+ "epoch": 5.74,
524
+ "learning_rate": 2.137244592447658e-05,
525
+ "loss": 0.0088,
526
+ "step": 310
527
+ },
528
+ {
529
+ "epoch": 5.83,
530
+ "learning_rate": 2.0607952376040353e-05,
531
+ "loss": 0.042,
532
+ "step": 315
533
+ },
534
+ {
535
+ "epoch": 5.93,
536
+ "learning_rate": 1.9847674395961406e-05,
537
+ "loss": 0.0216,
538
+ "step": 320
539
+ },
540
+ {
541
+ "epoch": 6.0,
542
+ "eval_PREDICATE_f1": 0.9501915708812261,
543
+ "eval_PREDICATE_number": 129,
544
+ "eval_PREDICATE_precision": 0.9393939393939394,
545
+ "eval_PREDICATE_recall": 0.9612403100775194,
546
+ "eval_loss": 0.2082541584968567,
547
+ "eval_overall_accuracy": 0.9561925365062196,
548
+ "eval_overall_f1": 0.8252631578947369,
549
+ "eval_overall_precision": 0.8484848484848485,
550
+ "eval_overall_recall": 0.8032786885245902,
551
+ "eval_runtime": 0.4958,
552
+ "eval_samples_per_second": 193.643,
553
+ "eval_steps_per_second": 24.205,
554
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
555
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
556
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
557
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
558
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6717557251908397,
559
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
560
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.7586206896551724,
561
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.6027397260273972,
562
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.6829268292682927,
563
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
564
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6829268292682927,
565
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.6829268292682927,
566
+ "step": 324
567
+ },
568
+ {
569
+ "epoch": 6.02,
570
+ "learning_rate": 1.909234171307466e-05,
571
+ "loss": 0.0091,
572
+ "step": 325
573
+ },
574
+ {
575
+ "epoch": 6.11,
576
+ "learning_rate": 1.8342679309627543e-05,
577
+ "loss": 0.0164,
578
+ "step": 330
579
+ },
580
+ {
581
+ "epoch": 6.2,
582
+ "learning_rate": 1.7599406725428617e-05,
583
+ "loss": 0.01,
584
+ "step": 335
585
+ },
586
+ {
587
+ "epoch": 6.3,
588
+ "learning_rate": 1.6863237367220058e-05,
589
+ "loss": 0.0103,
590
+ "step": 340
591
+ },
592
+ {
593
+ "epoch": 6.39,
594
+ "learning_rate": 1.613487782393661e-05,
595
+ "loss": 0.0151,
596
+ "step": 345
597
+ },
598
+ {
599
+ "epoch": 6.48,
600
+ "learning_rate": 1.5415027188508573e-05,
601
+ "loss": 0.0039,
602
+ "step": 350
603
+ },
604
+ {
605
+ "epoch": 6.57,
606
+ "learning_rate": 1.4704376386859448e-05,
607
+ "loss": 0.0211,
608
+ "step": 355
609
+ },
610
+ {
611
+ "epoch": 6.67,
612
+ "learning_rate": 1.4003607514742529e-05,
613
+ "loss": 0.0257,
614
+ "step": 360
615
+ },
616
+ {
617
+ "epoch": 6.76,
618
+ "learning_rate": 1.3313393183052747e-05,
619
+ "loss": 0.0072,
620
+ "step": 365
621
+ },
622
+ {
623
+ "epoch": 6.85,
624
+ "learning_rate": 1.2634395872242433e-05,
625
+ "loss": 0.0171,
626
+ "step": 370
627
+ },
628
+ {
629
+ "epoch": 6.94,
630
+ "learning_rate": 1.1967267296460208e-05,
631
+ "loss": 0.0176,
632
+ "step": 375
633
+ },
634
+ {
635
+ "epoch": 7.0,
636
+ "eval_PREDICATE_f1": 0.9465648854961832,
637
+ "eval_PREDICATE_number": 129,
638
+ "eval_PREDICATE_precision": 0.9323308270676691,
639
+ "eval_PREDICATE_recall": 0.9612403100775194,
640
+ "eval_loss": 0.220278799533844,
641
+ "eval_overall_accuracy": 0.9578150351541374,
642
+ "eval_overall_f1": 0.8356997971602433,
643
+ "eval_overall_precision": 0.8273092369477911,
644
+ "eval_overall_recall": 0.8442622950819673,
645
+ "eval_runtime": 0.4918,
646
+ "eval_samples_per_second": 195.192,
647
+ "eval_steps_per_second": 24.399,
648
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
649
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
650
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
651
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
652
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6906474820143884,
653
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
654
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.7272727272727273,
655
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.6575342465753424,
656
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.7472527472527474,
657
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
658
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.68,
659
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.8292682926829268,
660
+ "step": 378
661
+ },
662
+ {
663
+ "epoch": 7.04,
664
+ "learning_rate": 1.131264777802387e-05,
665
+ "loss": 0.008,
666
+ "step": 380
667
+ },
668
+ {
669
+ "epoch": 7.13,
670
+ "learning_rate": 1.0671165632827097e-05,
671
+ "loss": 0.0118,
672
+ "step": 385
673
+ },
674
+ {
675
+ "epoch": 7.22,
676
+ "learning_rate": 1.0043436567270312e-05,
677
+ "loss": 0.0111,
678
+ "step": 390
679
+ },
680
+ {
681
+ "epoch": 7.31,
682
+ "learning_rate": 9.43006308729432e-06,
683
+ "loss": 0.0085,
684
+ "step": 395
685
+ },
686
+ {
687
+ "epoch": 7.41,
688
+ "learning_rate": 8.831633920083968e-06,
689
+ "loss": 0.0163,
690
+ "step": 400
691
+ },
692
+ {
693
+ "epoch": 7.5,
694
+ "learning_rate": 8.248723448996942e-06,
695
+ "loss": 0.0045,
696
+ "step": 405
697
+ },
698
+ {
699
+ "epoch": 7.59,
700
+ "learning_rate": 7.681891162260015e-06,
701
+ "loss": 0.0077,
702
+ "step": 410
703
+ },
704
+ {
705
+ "epoch": 7.69,
706
+ "learning_rate": 7.1316811159619305e-06,
707
+ "loss": 0.0078,
708
+ "step": 415
709
+ },
710
+ {
711
+ "epoch": 7.78,
712
+ "learning_rate": 6.598621411858319e-06,
713
+ "loss": 0.0147,
714
+ "step": 420
715
+ },
716
+ {
717
+ "epoch": 7.87,
718
+ "learning_rate": 6.083223690489901e-06,
719
+ "loss": 0.0145,
720
+ "step": 425
721
+ },
722
+ {
723
+ "epoch": 7.96,
724
+ "learning_rate": 5.585982640100415e-06,
725
+ "loss": 0.0037,
726
+ "step": 430
727
+ },
728
+ {
729
+ "epoch": 8.0,
730
+ "eval_PREDICATE_f1": 0.9465648854961832,
731
+ "eval_PREDICATE_number": 129,
732
+ "eval_PREDICATE_precision": 0.9323308270676691,
733
+ "eval_PREDICATE_recall": 0.9612403100775194,
734
+ "eval_loss": 0.2312764674425125,
735
+ "eval_overall_accuracy": 0.9567333693888589,
736
+ "eval_overall_f1": 0.8329896907216495,
737
+ "eval_overall_precision": 0.8381742738589212,
738
+ "eval_overall_recall": 0.8278688524590164,
739
+ "eval_runtime": 0.5097,
740
+ "eval_samples_per_second": 188.335,
741
+ "eval_steps_per_second": 23.542,
742
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
743
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
744
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
745
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
746
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6865671641791045,
747
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
748
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.7540983606557377,
749
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.6301369863013698,
750
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.7272727272727273,
751
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
752
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6808510638297872,
753
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.7804878048780488,
754
+ "step": 432
755
+ },
756
+ {
757
+ "epoch": 8.06,
758
+ "learning_rate": 5.107375521825791e-06,
759
+ "loss": 0.0066,
760
+ "step": 435
761
+ },
762
+ {
763
+ "epoch": 8.15,
764
+ "learning_rate": 4.647861711610024e-06,
765
+ "loss": 0.0083,
766
+ "step": 440
767
+ },
768
+ {
769
+ "epoch": 8.24,
770
+ "learning_rate": 4.207882259287707e-06,
771
+ "loss": 0.0091,
772
+ "step": 445
773
+ },
774
+ {
775
+ "epoch": 8.33,
776
+ "learning_rate": 3.7878594652562575e-06,
777
+ "loss": 0.009,
778
+ "step": 450
779
+ },
780
+ {
781
+ "epoch": 8.43,
782
+ "learning_rate": 3.388196475144198e-06,
783
+ "loss": 0.0047,
784
+ "step": 455
785
+ },
786
+ {
787
+ "epoch": 8.52,
788
+ "learning_rate": 3.0092768928645375e-06,
789
+ "loss": 0.0048,
790
+ "step": 460
791
+ },
792
+ {
793
+ "epoch": 8.61,
794
+ "learning_rate": 2.651464412424667e-06,
795
+ "loss": 0.0114,
796
+ "step": 465
797
+ },
798
+ {
799
+ "epoch": 8.7,
800
+ "learning_rate": 2.315102468846142e-06,
801
+ "loss": 0.0058,
802
+ "step": 470
803
+ },
804
+ {
805
+ "epoch": 8.8,
806
+ "learning_rate": 2.0005139085293945e-06,
807
+ "loss": 0.0049,
808
+ "step": 475
809
+ },
810
+ {
811
+ "epoch": 8.89,
812
+ "learning_rate": 1.7080006793798176e-06,
813
+ "loss": 0.0125,
814
+ "step": 480
815
+ },
816
+ {
817
+ "epoch": 8.98,
818
+ "learning_rate": 1.4378435409925867e-06,
819
+ "loss": 0.0089,
820
+ "step": 485
821
+ },
822
+ {
823
+ "epoch": 9.0,
824
+ "eval_PREDICATE_f1": 0.9465648854961832,
825
+ "eval_PREDICATE_number": 129,
826
+ "eval_PREDICATE_precision": 0.9323308270676691,
827
+ "eval_PREDICATE_recall": 0.9612403100775194,
828
+ "eval_loss": 0.24090717732906342,
829
+ "eval_overall_accuracy": 0.9588967009194159,
830
+ "eval_overall_f1": 0.8418891170431212,
831
+ "eval_overall_precision": 0.8436213991769548,
832
+ "eval_overall_recall": 0.8401639344262295,
833
+ "eval_runtime": 0.502,
834
+ "eval_samples_per_second": 191.245,
835
+ "eval_steps_per_second": 23.906,
836
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
837
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
838
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
839
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
840
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.7014925373134328,
841
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
842
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.7704918032786885,
843
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.6438356164383562,
844
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.7555555555555555,
845
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
846
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6938775510204082,
847
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.8292682926829268,
848
+ "step": 486
849
+ },
850
+ {
851
+ "epoch": 9.07,
852
+ "learning_rate": 1.1903017951744144e-06,
853
+ "loss": 0.0055,
854
+ "step": 490
855
+ },
856
+ {
857
+ "epoch": 9.17,
858
+ "learning_rate": 9.656130370609056e-07,
859
+ "loss": 0.0089,
860
+ "step": 495
861
+ },
862
+ {
863
+ "epoch": 9.26,
864
+ "learning_rate": 7.639929270683438e-07,
865
+ "loss": 0.0064,
866
+ "step": 500
867
+ },
868
+ {
869
+ "epoch": 9.35,
870
+ "learning_rate": 5.856349838988612e-07,
871
+ "loss": 0.0057,
872
+ "step": 505
873
+ },
874
+ {
875
+ "epoch": 9.44,
876
+ "learning_rate": 4.307103987976041e-07,
877
+ "loss": 0.002,
878
+ "step": 510
879
+ },
880
+ {
881
+ "epoch": 9.54,
882
+ "learning_rate": 2.9936787124022206e-07,
883
+ "loss": 0.006,
884
+ "step": 515
885
+ },
886
+ {
887
+ "epoch": 9.63,
888
+ "learning_rate": 1.917334662083714e-07,
889
+ "loss": 0.0094,
890
+ "step": 520
891
+ },
892
+ {
893
+ "epoch": 9.72,
894
+ "learning_rate": 1.0791049319021085e-07,
895
+ "loss": 0.0044,
896
+ "step": 525
897
+ },
898
+ {
899
+ "epoch": 9.81,
900
+ "learning_rate": 4.797940702205572e-08,
901
+ "loss": 0.0092,
902
+ "step": 530
903
+ },
904
+ {
905
+ "epoch": 9.91,
906
+ "learning_rate": 1.1997730666338247e-08,
907
+ "loss": 0.009,
908
+ "step": 535
909
+ },
910
+ {
911
+ "epoch": 10.0,
912
+ "learning_rate": 0.0,
913
+ "loss": 0.0043,
914
+ "step": 540
915
+ },
916
+ {
917
+ "epoch": 10.0,
918
+ "eval_PREDICATE_f1": 0.9465648854961832,
919
+ "eval_PREDICATE_number": 129,
920
+ "eval_PREDICATE_precision": 0.9323308270676691,
921
+ "eval_PREDICATE_recall": 0.9612403100775194,
922
+ "eval_loss": 0.2416924387216568,
923
+ "eval_overall_accuracy": 0.9583558680367766,
924
+ "eval_overall_f1": 0.8395061728395061,
925
+ "eval_overall_precision": 0.8429752066115702,
926
+ "eval_overall_recall": 0.8360655737704918,
927
+ "eval_runtime": 0.5002,
928
+ "eval_samples_per_second": 191.922,
929
+ "eval_steps_per_second": 23.99,
930
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_f1": 0.0,
931
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_number": 1,
932
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_precision": 0.0,
933
+ "eval_\u0418\u041d\u0421\u0422\u0420\u0423\u041c\u0415\u041d\u0422_recall": 0.0,
934
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_f1": 0.6917293233082706,
935
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_number": 73,
936
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_precision": 0.7666666666666667,
937
+ "eval_\u041a\u0410\u0423\u0417\u0410\u0422\u041e\u0420_recall": 0.6301369863013698,
938
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_f1": 0.7555555555555555,
939
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_number": 41,
940
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_precision": 0.6938775510204082,
941
+ "eval_\u042d\u041a\u0421\u041f\u0415\u0420\u0418\u0415\u041d\u0426\u0415\u0420_recall": 0.8292682926829268,
942
+ "step": 540
943
+ },
944
+ {
945
+ "epoch": 10.0,
946
+ "step": 540,
947
+ "total_flos": 127804763383596.0,
948
+ "train_loss": 0.10899043403289936,
949
+ "train_runtime": 173.8981,
950
+ "train_samples_per_second": 49.397,
951
+ "train_steps_per_second": 3.105
952
+ }
953
+ ],
954
+ "max_steps": 540,
955
+ "num_train_epochs": 10,
956
+ "total_flos": 127804763383596.0,
957
+ "trial_name": null,
958
+ "trial_params": null
959
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44d086c9d09760d732d84096da922939157e9e704001df2e4a68ec4a53468b21
3
+ size 2927
vocab.txt ADDED
The diff for this file is too large to render. See raw diff