Upload model.npz.yml with huggingface_hub
Browse files- model.npz.yml +246 -0
model.npz.yml
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
authors: false
|
2 |
+
cite: false
|
3 |
+
build-info: ""
|
4 |
+
workspace: -8000
|
5 |
+
log: train.log
|
6 |
+
log-level: info
|
7 |
+
log-time-zone: PST8PDT
|
8 |
+
quiet: false
|
9 |
+
quiet-translation: true
|
10 |
+
seed: 141414
|
11 |
+
check-nan: false
|
12 |
+
interpolate-env-vars: true
|
13 |
+
relative-paths: false
|
14 |
+
dump-config: ""
|
15 |
+
sigterm: save-and-exit
|
16 |
+
model: model_files/model.npz
|
17 |
+
pretrained-model: ""
|
18 |
+
ignore-model-config: false
|
19 |
+
type: transformer
|
20 |
+
dim-vocabs:
|
21 |
+
- 64000
|
22 |
+
- 64000
|
23 |
+
dim-emb: 1024
|
24 |
+
factors-dim-emb: 0
|
25 |
+
factors-combine: sum
|
26 |
+
lemma-dependency: ""
|
27 |
+
lemma-dim-emb: 0
|
28 |
+
dim-rnn: 1024
|
29 |
+
enc-type: bidirectional
|
30 |
+
enc-cell: gru
|
31 |
+
enc-cell-depth: 1
|
32 |
+
enc-depth: 6
|
33 |
+
dec-cell: gru
|
34 |
+
dec-cell-base-depth: 2
|
35 |
+
dec-cell-high-depth: 1
|
36 |
+
dec-depth: 6
|
37 |
+
skip: false
|
38 |
+
layer-normalization: false
|
39 |
+
right-left: false
|
40 |
+
input-types:
|
41 |
+
[]
|
42 |
+
tied-embeddings: true
|
43 |
+
tied-embeddings-src: false
|
44 |
+
tied-embeddings-all: true
|
45 |
+
output-omit-bias: false
|
46 |
+
transformer-heads: 8
|
47 |
+
transformer-no-projection: false
|
48 |
+
transformer-rnn-projection: false
|
49 |
+
transformer-pool: false
|
50 |
+
transformer-dim-ffn: 8192
|
51 |
+
transformer-decoder-dim-ffn: 8192
|
52 |
+
transformer-ffn-depth: 2
|
53 |
+
transformer-decoder-ffn-depth: 0
|
54 |
+
transformer-ffn-activation: relu
|
55 |
+
transformer-dim-aan: 2048
|
56 |
+
transformer-aan-depth: 2
|
57 |
+
transformer-aan-activation: swish
|
58 |
+
transformer-aan-nogate: false
|
59 |
+
transformer-decoder-autoreg: self-attention
|
60 |
+
transformer-tied-layers: []
|
61 |
+
transformer-guided-alignment-layer: last
|
62 |
+
transformer-preprocess: ""
|
63 |
+
transformer-postprocess-emb: d
|
64 |
+
transformer-postprocess: dan
|
65 |
+
transformer-postprocess-top: ""
|
66 |
+
transformer-train-position-embeddings: false
|
67 |
+
transformer-depth-scaling: true
|
68 |
+
transformer-no-bias: false
|
69 |
+
transformer-no-affine: false
|
70 |
+
bert-mask-symbol: "[MASK]"
|
71 |
+
bert-sep-symbol: "[SEP]"
|
72 |
+
bert-class-symbol: "[CLS]"
|
73 |
+
bert-masking-fraction: 0.15
|
74 |
+
bert-train-type-embeddings: true
|
75 |
+
bert-type-vocab-size: 2
|
76 |
+
comet-final-sigmoid: false
|
77 |
+
comet-mix: false
|
78 |
+
comet-mix-norm: false
|
79 |
+
comet-dropout: 0.1
|
80 |
+
comet-mixup: 0
|
81 |
+
comet-mixup-reg: false
|
82 |
+
comet-pooler-ffn:
|
83 |
+
- 2048
|
84 |
+
- 1024
|
85 |
+
comet-prepend-zero: false
|
86 |
+
dropout-rnn: 0
|
87 |
+
dropout-src: 0
|
88 |
+
dropout-trg: 0
|
89 |
+
transformer-dropout: 0.1
|
90 |
+
transformer-dropout-attention: 0
|
91 |
+
transformer-dropout-ffn: 0.1
|
92 |
+
cost-type: ce-sum
|
93 |
+
multi-loss-type: sum
|
94 |
+
unlikelihood-loss: false
|
95 |
+
overwrite: false
|
96 |
+
overwrite-checkpoint: true
|
97 |
+
no-reload: false
|
98 |
+
train-sets:
|
99 |
+
- stdin
|
100 |
+
vocabs:
|
101 |
+
- vocab
|
102 |
+
- vocab
|
103 |
+
sentencepiece-alphas:
|
104 |
+
[]
|
105 |
+
sentencepiece-options: ""
|
106 |
+
sentencepiece-max-lines: 2000000
|
107 |
+
no-spm-encode: false
|
108 |
+
after-epochs: 0
|
109 |
+
after-batches: 0
|
110 |
+
after: 40e
|
111 |
+
disp-freq: 100Mt
|
112 |
+
disp-first: 10
|
113 |
+
disp-label-counts: true
|
114 |
+
save-freq: 1Gt
|
115 |
+
logical-epoch:
|
116 |
+
- 1Gt
|
117 |
+
max-length: 256
|
118 |
+
max-length-crop: false
|
119 |
+
tsv: true
|
120 |
+
tsv-fields: 2
|
121 |
+
shuffle: batches
|
122 |
+
no-restore-corpus: true
|
123 |
+
tempdir: /tmp
|
124 |
+
sqlite: ""
|
125 |
+
sqlite-drop: false
|
126 |
+
devices:
|
127 |
+
- 0
|
128 |
+
- 1
|
129 |
+
no-nccl: false
|
130 |
+
sharding: local
|
131 |
+
sync-freq: 200u
|
132 |
+
cpu-threads: 0
|
133 |
+
mini-batch: 1000
|
134 |
+
mini-batch-words: 500000
|
135 |
+
mini-batch-fit: true
|
136 |
+
mini-batch-fit-step: 5
|
137 |
+
gradient-checkpointing: false
|
138 |
+
maxi-batch: 1000
|
139 |
+
maxi-batch-sort: trg
|
140 |
+
shuffle-in-ram: true
|
141 |
+
data-threads: 8
|
142 |
+
all-caps-every: 0
|
143 |
+
english-title-case-every: 0
|
144 |
+
mini-batch-words-ref: 0
|
145 |
+
mini-batch-warmup: 4000
|
146 |
+
mini-batch-track-lr: false
|
147 |
+
mini-batch-round-up: true
|
148 |
+
optimizer: adam
|
149 |
+
optimizer-params:
|
150 |
+
- 0.9
|
151 |
+
- 0.999
|
152 |
+
- 1e-08
|
153 |
+
- 0.01
|
154 |
+
optimizer-delay: 1
|
155 |
+
sync-sgd: true
|
156 |
+
learn-rate: 0.0005
|
157 |
+
lr-report: true
|
158 |
+
lr-decay: 0
|
159 |
+
lr-decay-strategy: epoch+stalled
|
160 |
+
lr-decay-start:
|
161 |
+
- 10
|
162 |
+
- 1
|
163 |
+
lr-decay-freq: 50000
|
164 |
+
lr-decay-reset-optimizer: false
|
165 |
+
lr-decay-repeat-warmup: false
|
166 |
+
lr-decay-inv-sqrt:
|
167 |
+
- 4000
|
168 |
+
lr-warmup: 4000
|
169 |
+
lr-warmup-start-rate: 0
|
170 |
+
lr-warmup-cycle: false
|
171 |
+
lr-warmup-at-reload: false
|
172 |
+
label-smoothing: 0.1
|
173 |
+
factor-weight: 1
|
174 |
+
clip-norm: 0
|
175 |
+
exponential-smoothing: 1e-3
|
176 |
+
exponential-smoothing-replace-freq: 0
|
177 |
+
guided-alignment: none
|
178 |
+
guided-alignment-cost: ce
|
179 |
+
guided-alignment-weight: 0
|
180 |
+
data-weighting: ""
|
181 |
+
data-weighting-type: sentence
|
182 |
+
embedding-vectors:
|
183 |
+
[]
|
184 |
+
embedding-normalization: false
|
185 |
+
embedding-fix-src: false
|
186 |
+
embedding-fix-trg: false
|
187 |
+
precision:
|
188 |
+
- float32
|
189 |
+
- float32
|
190 |
+
cost-scaling:
|
191 |
+
- 256.f
|
192 |
+
- 10000
|
193 |
+
- 1.f
|
194 |
+
- 256.f
|
195 |
+
throw-on-divergence:
|
196 |
+
[]
|
197 |
+
custom-fallbacks:
|
198 |
+
[]
|
199 |
+
gradient-norm-average-window: 100
|
200 |
+
dynamic-gradient-scaling:
|
201 |
+
- 2
|
202 |
+
- log
|
203 |
+
check-gradient-nan: false
|
204 |
+
normalize-gradient: false
|
205 |
+
train-embedder-rank:
|
206 |
+
[]
|
207 |
+
quantize-bits: 0
|
208 |
+
quantize-optimization-steps: 0
|
209 |
+
quantize-log-based: false
|
210 |
+
quantize-biases: false
|
211 |
+
ulr: false
|
212 |
+
ulr-query-vectors: ""
|
213 |
+
ulr-keys-vectors: ""
|
214 |
+
ulr-trainable-transformation: false
|
215 |
+
ulr-dim-emb: 0
|
216 |
+
ulr-dropout: 0
|
217 |
+
ulr-softmax-temperature: 1
|
218 |
+
valid-sets:
|
219 |
+
- dev.en-de
|
220 |
+
valid-freq: 1Gt
|
221 |
+
valid-metrics:
|
222 |
+
- perplexity
|
223 |
+
- ce-mean-words
|
224 |
+
- bleu
|
225 |
+
- chrf
|
226 |
+
valid-reset-stalled: false
|
227 |
+
valid-reset-all: false
|
228 |
+
early-stopping: 40
|
229 |
+
early-stopping-epsilon:
|
230 |
+
- 0
|
231 |
+
early-stopping-on: first
|
232 |
+
beam-size: 4
|
233 |
+
normalize: 1.0
|
234 |
+
max-length-factor: 3
|
235 |
+
word-penalty: 0.0
|
236 |
+
allow-unk: false
|
237 |
+
n-best: false
|
238 |
+
word-scores: false
|
239 |
+
valid-mini-batch: 32
|
240 |
+
valid-max-length: 1000
|
241 |
+
valid-script-path: ""
|
242 |
+
valid-script-args:
|
243 |
+
[]
|
244 |
+
valid-translation-output: valid.trg.output
|
245 |
+
keep-best: true
|
246 |
+
valid-log: valid.log
|