Training in progress, step 500
Browse files- added_tokens.json +3 -0
- bpe.codes +0 -0
- config.json +11 -9
- pytorch_model.bin +2 -2
- runs/May12_16-01-38_5165d9a7ee5b/1683907656.4304566/events.out.tfevents.1683907656.5165d9a7ee5b.168.7 +3 -0
- runs/May12_16-01-38_5165d9a7ee5b/events.out.tfevents.1683907656.5165d9a7ee5b.168.6 +3 -0
- special_tokens_map.json +1 -7
- tokenizer_config.json +2 -5
- training_args.bin +1 -1
- vocab.txt +0 -0
added_tokens.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<mask>": 64000
|
3 |
+
}
|
bpe.codes
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"EncoderDecoderModel"
|
5 |
],
|
6 |
"decoder": {
|
7 |
-
"_name_or_path": "
|
8 |
"add_cross_attention": true,
|
9 |
"architectures": [
|
10 |
"RobertaForMaskedLM"
|
@@ -26,6 +26,7 @@
|
|
26 |
"finetuning_task": null,
|
27 |
"forced_bos_token_id": null,
|
28 |
"forced_eos_token_id": null,
|
|
|
29 |
"hidden_act": "gelu",
|
30 |
"hidden_dropout_prob": 0.1,
|
31 |
"hidden_size": 768,
|
@@ -44,7 +45,7 @@
|
|
44 |
"layer_norm_eps": 1e-05,
|
45 |
"length_penalty": 1.0,
|
46 |
"max_length": 20,
|
47 |
-
"max_position_embeddings":
|
48 |
"min_length": 0,
|
49 |
"model_type": "roberta",
|
50 |
"no_repeat_ngram_size": 0,
|
@@ -72,7 +73,7 @@
|
|
72 |
"tf_legacy_loss": false,
|
73 |
"tie_encoder_decoder": false,
|
74 |
"tie_word_embeddings": true,
|
75 |
-
"tokenizer_class":
|
76 |
"top_k": 50,
|
77 |
"top_p": 1.0,
|
78 |
"torch_dtype": null,
|
@@ -82,12 +83,12 @@
|
|
82 |
"typical_p": 1.0,
|
83 |
"use_bfloat16": false,
|
84 |
"use_cache": true,
|
85 |
-
"vocab_size":
|
86 |
},
|
87 |
"decoder_start_token_id": 0,
|
88 |
"early_stopping": true,
|
89 |
"encoder": {
|
90 |
-
"_name_or_path": "
|
91 |
"add_cross_attention": false,
|
92 |
"architectures": [
|
93 |
"RobertaForMaskedLM"
|
@@ -109,6 +110,7 @@
|
|
109 |
"finetuning_task": null,
|
110 |
"forced_bos_token_id": null,
|
111 |
"forced_eos_token_id": null,
|
|
|
112 |
"hidden_act": "gelu",
|
113 |
"hidden_dropout_prob": 0.1,
|
114 |
"hidden_size": 768,
|
@@ -127,7 +129,7 @@
|
|
127 |
"layer_norm_eps": 1e-05,
|
128 |
"length_penalty": 1.0,
|
129 |
"max_length": 20,
|
130 |
-
"max_position_embeddings":
|
131 |
"min_length": 0,
|
132 |
"model_type": "roberta",
|
133 |
"no_repeat_ngram_size": 0,
|
@@ -155,7 +157,7 @@
|
|
155 |
"tf_legacy_loss": false,
|
156 |
"tie_encoder_decoder": false,
|
157 |
"tie_word_embeddings": true,
|
158 |
-
"tokenizer_class":
|
159 |
"top_k": 50,
|
160 |
"top_p": 1.0,
|
161 |
"torch_dtype": null,
|
@@ -165,7 +167,7 @@
|
|
165 |
"typical_p": 1.0,
|
166 |
"use_bfloat16": false,
|
167 |
"use_cache": true,
|
168 |
-
"vocab_size":
|
169 |
},
|
170 |
"eos_token_id": 2,
|
171 |
"is_encoder_decoder": true,
|
@@ -178,5 +180,5 @@
|
|
178 |
"tie_encoder_decoder": true,
|
179 |
"torch_dtype": "float32",
|
180 |
"transformers_version": null,
|
181 |
-
"vocab_size":
|
182 |
}
|
|
|
4 |
"EncoderDecoderModel"
|
5 |
],
|
6 |
"decoder": {
|
7 |
+
"_name_or_path": "vinai/phobert-base",
|
8 |
"add_cross_attention": true,
|
9 |
"architectures": [
|
10 |
"RobertaForMaskedLM"
|
|
|
26 |
"finetuning_task": null,
|
27 |
"forced_bos_token_id": null,
|
28 |
"forced_eos_token_id": null,
|
29 |
+
"gradient_checkpointing": false,
|
30 |
"hidden_act": "gelu",
|
31 |
"hidden_dropout_prob": 0.1,
|
32 |
"hidden_size": 768,
|
|
|
45 |
"layer_norm_eps": 1e-05,
|
46 |
"length_penalty": 1.0,
|
47 |
"max_length": 20,
|
48 |
+
"max_position_embeddings": 258,
|
49 |
"min_length": 0,
|
50 |
"model_type": "roberta",
|
51 |
"no_repeat_ngram_size": 0,
|
|
|
73 |
"tf_legacy_loss": false,
|
74 |
"tie_encoder_decoder": false,
|
75 |
"tie_word_embeddings": true,
|
76 |
+
"tokenizer_class": "PhobertTokenizer",
|
77 |
"top_k": 50,
|
78 |
"top_p": 1.0,
|
79 |
"torch_dtype": null,
|
|
|
83 |
"typical_p": 1.0,
|
84 |
"use_bfloat16": false,
|
85 |
"use_cache": true,
|
86 |
+
"vocab_size": 64001
|
87 |
},
|
88 |
"decoder_start_token_id": 0,
|
89 |
"early_stopping": true,
|
90 |
"encoder": {
|
91 |
+
"_name_or_path": "vinai/phobert-base",
|
92 |
"add_cross_attention": false,
|
93 |
"architectures": [
|
94 |
"RobertaForMaskedLM"
|
|
|
110 |
"finetuning_task": null,
|
111 |
"forced_bos_token_id": null,
|
112 |
"forced_eos_token_id": null,
|
113 |
+
"gradient_checkpointing": false,
|
114 |
"hidden_act": "gelu",
|
115 |
"hidden_dropout_prob": 0.1,
|
116 |
"hidden_size": 768,
|
|
|
129 |
"layer_norm_eps": 1e-05,
|
130 |
"length_penalty": 1.0,
|
131 |
"max_length": 20,
|
132 |
+
"max_position_embeddings": 258,
|
133 |
"min_length": 0,
|
134 |
"model_type": "roberta",
|
135 |
"no_repeat_ngram_size": 0,
|
|
|
157 |
"tf_legacy_loss": false,
|
158 |
"tie_encoder_decoder": false,
|
159 |
"tie_word_embeddings": true,
|
160 |
+
"tokenizer_class": "PhobertTokenizer",
|
161 |
"top_k": 50,
|
162 |
"top_p": 1.0,
|
163 |
"torch_dtype": null,
|
|
|
167 |
"typical_p": 1.0,
|
168 |
"use_bfloat16": false,
|
169 |
"use_cache": true,
|
170 |
+
"vocab_size": 64001
|
171 |
},
|
172 |
"eos_token_id": 2,
|
173 |
"is_encoder_decoder": true,
|
|
|
180 |
"tie_encoder_decoder": true,
|
181 |
"torch_dtype": "float32",
|
182 |
"transformers_version": null,
|
183 |
+
"vocab_size": 64001
|
184 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1a203030b90c83185e77226eeaed08ad2c9c2447371c48a319e9b019226ead81
|
3 |
+
size 656232805
|
runs/May12_16-01-38_5165d9a7ee5b/1683907656.4304566/events.out.tfevents.1683907656.5165d9a7ee5b.168.7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aca4cea1cafa9886991b2d68b5fe7e156b8b1fc222d99b8fb92558d24fcf9b18
|
3 |
+
size 6199
|
runs/May12_16-01-38_5165d9a7ee5b/events.out.tfevents.1683907656.5165d9a7ee5b.168.6
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ca4db850db7d5ef9d0bbe47c6592a3958fb7f503e8acad68c1fe66c9c69c69da
|
3 |
+
size 8771
|
special_tokens_map.json
CHANGED
@@ -2,13 +2,7 @@
|
|
2 |
"bos_token": "<s>",
|
3 |
"cls_token": "<s>",
|
4 |
"eos_token": "</s>",
|
5 |
-
"mask_token":
|
6 |
-
"content": "<mask>",
|
7 |
-
"lstrip": true,
|
8 |
-
"normalized": false,
|
9 |
-
"rstrip": false,
|
10 |
-
"single_word": false
|
11 |
-
},
|
12 |
"pad_token": "<pad>",
|
13 |
"sep_token": "</s>",
|
14 |
"unk_token": "<unk>"
|
|
|
2 |
"bos_token": "<s>",
|
3 |
"cls_token": "<s>",
|
4 |
"eos_token": "</s>",
|
5 |
+
"mask_token": "<mask>",
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"pad_token": "<pad>",
|
7 |
"sep_token": "</s>",
|
8 |
"unk_token": "<unk>"
|
tokenizer_config.json
CHANGED
@@ -1,15 +1,12 @@
|
|
1 |
{
|
2 |
-
"add_prefix_space": false,
|
3 |
"bos_token": "<s>",
|
4 |
"clean_up_tokenization_spaces": true,
|
5 |
"cls_token": "<s>",
|
6 |
"eos_token": "</s>",
|
7 |
-
"errors": "replace",
|
8 |
"mask_token": "<mask>",
|
9 |
-
"model_max_length":
|
10 |
"pad_token": "<pad>",
|
11 |
"sep_token": "</s>",
|
12 |
-
"tokenizer_class": "
|
13 |
-
"trim_offsets": true,
|
14 |
"unk_token": "<unk>"
|
15 |
}
|
|
|
1 |
{
|
|
|
2 |
"bos_token": "<s>",
|
3 |
"clean_up_tokenization_spaces": true,
|
4 |
"cls_token": "<s>",
|
5 |
"eos_token": "</s>",
|
|
|
6 |
"mask_token": "<mask>",
|
7 |
+
"model_max_length": 256,
|
8 |
"pad_token": "<pad>",
|
9 |
"sep_token": "</s>",
|
10 |
+
"tokenizer_class": "PhobertTokenizer",
|
|
|
11 |
"unk_token": "<unk>"
|
12 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3771
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ca12c74006d536fad46377ac4ad99cf43ea95fbe535bb6d77fdb10d1e6999b4
|
3 |
size 3771
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|