rewicks commited on
Commit
6395550
1 Parent(s): 8a23fcf

Upload model.npz.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. model.npz.yml +246 -0
model.npz.yml ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ authors: false
2
+ cite: false
3
+ build-info: ""
4
+ workspace: -8000
5
+ log: train.log
6
+ log-level: info
7
+ log-time-zone: PST8PDT
8
+ quiet: false
9
+ quiet-translation: true
10
+ seed: 141414
11
+ check-nan: false
12
+ interpolate-env-vars: true
13
+ relative-paths: false
14
+ dump-config: ""
15
+ sigterm: save-and-exit
16
+ model: model_files/model.npz
17
+ pretrained-model: ""
18
+ ignore-model-config: false
19
+ type: transformer
20
+ dim-vocabs:
21
+ - 64000
22
+ - 64000
23
+ dim-emb: 1024
24
+ factors-dim-emb: 0
25
+ factors-combine: sum
26
+ lemma-dependency: ""
27
+ lemma-dim-emb: 0
28
+ dim-rnn: 1024
29
+ enc-type: bidirectional
30
+ enc-cell: gru
31
+ enc-cell-depth: 1
32
+ enc-depth: 6
33
+ dec-cell: gru
34
+ dec-cell-base-depth: 2
35
+ dec-cell-high-depth: 1
36
+ dec-depth: 6
37
+ skip: false
38
+ layer-normalization: false
39
+ right-left: false
40
+ input-types:
41
+ []
42
+ tied-embeddings: true
43
+ tied-embeddings-src: false
44
+ tied-embeddings-all: true
45
+ output-omit-bias: false
46
+ transformer-heads: 8
47
+ transformer-no-projection: false
48
+ transformer-rnn-projection: false
49
+ transformer-pool: false
50
+ transformer-dim-ffn: 8192
51
+ transformer-decoder-dim-ffn: 8192
52
+ transformer-ffn-depth: 2
53
+ transformer-decoder-ffn-depth: 0
54
+ transformer-ffn-activation: relu
55
+ transformer-dim-aan: 2048
56
+ transformer-aan-depth: 2
57
+ transformer-aan-activation: swish
58
+ transformer-aan-nogate: false
59
+ transformer-decoder-autoreg: self-attention
60
+ transformer-tied-layers: []
61
+ transformer-guided-alignment-layer: last
62
+ transformer-preprocess: ""
63
+ transformer-postprocess-emb: d
64
+ transformer-postprocess: dan
65
+ transformer-postprocess-top: ""
66
+ transformer-train-position-embeddings: false
67
+ transformer-depth-scaling: true
68
+ transformer-no-bias: false
69
+ transformer-no-affine: false
70
+ bert-mask-symbol: "[MASK]"
71
+ bert-sep-symbol: "[SEP]"
72
+ bert-class-symbol: "[CLS]"
73
+ bert-masking-fraction: 0.15
74
+ bert-train-type-embeddings: true
75
+ bert-type-vocab-size: 2
76
+ comet-final-sigmoid: false
77
+ comet-mix: false
78
+ comet-mix-norm: false
79
+ comet-dropout: 0.1
80
+ comet-mixup: 0
81
+ comet-mixup-reg: false
82
+ comet-pooler-ffn:
83
+ - 2048
84
+ - 1024
85
+ comet-prepend-zero: false
86
+ dropout-rnn: 0
87
+ dropout-src: 0
88
+ dropout-trg: 0
89
+ transformer-dropout: 0.1
90
+ transformer-dropout-attention: 0
91
+ transformer-dropout-ffn: 0.1
92
+ cost-type: ce-sum
93
+ multi-loss-type: sum
94
+ unlikelihood-loss: false
95
+ overwrite: false
96
+ overwrite-checkpoint: true
97
+ no-reload: false
98
+ train-sets:
99
+ - stdin
100
+ vocabs:
101
+ - vocab
102
+ - vocab
103
+ sentencepiece-alphas:
104
+ []
105
+ sentencepiece-options: ""
106
+ sentencepiece-max-lines: 2000000
107
+ no-spm-encode: false
108
+ after-epochs: 0
109
+ after-batches: 0
110
+ after: 40e
111
+ disp-freq: 100Mt
112
+ disp-first: 10
113
+ disp-label-counts: true
114
+ save-freq: 1Gt
115
+ logical-epoch:
116
+ - 1Gt
117
+ max-length: 256
118
+ max-length-crop: false
119
+ tsv: true
120
+ tsv-fields: 2
121
+ shuffle: batches
122
+ no-restore-corpus: true
123
+ tempdir: /tmp
124
+ sqlite: ""
125
+ sqlite-drop: false
126
+ devices:
127
+ - 0
128
+ - 1
129
+ no-nccl: false
130
+ sharding: local
131
+ sync-freq: 200u
132
+ cpu-threads: 0
133
+ mini-batch: 1000
134
+ mini-batch-words: 500000
135
+ mini-batch-fit: true
136
+ mini-batch-fit-step: 5
137
+ gradient-checkpointing: false
138
+ maxi-batch: 1000
139
+ maxi-batch-sort: trg
140
+ shuffle-in-ram: true
141
+ data-threads: 8
142
+ all-caps-every: 0
143
+ english-title-case-every: 0
144
+ mini-batch-words-ref: 0
145
+ mini-batch-warmup: 4000
146
+ mini-batch-track-lr: false
147
+ mini-batch-round-up: true
148
+ optimizer: adam
149
+ optimizer-params:
150
+ - 0.9
151
+ - 0.999
152
+ - 1e-08
153
+ - 0.01
154
+ optimizer-delay: 1
155
+ sync-sgd: true
156
+ learn-rate: 0.0005
157
+ lr-report: true
158
+ lr-decay: 0
159
+ lr-decay-strategy: epoch+stalled
160
+ lr-decay-start:
161
+ - 10
162
+ - 1
163
+ lr-decay-freq: 50000
164
+ lr-decay-reset-optimizer: false
165
+ lr-decay-repeat-warmup: false
166
+ lr-decay-inv-sqrt:
167
+ - 4000
168
+ lr-warmup: 4000
169
+ lr-warmup-start-rate: 0
170
+ lr-warmup-cycle: false
171
+ lr-warmup-at-reload: false
172
+ label-smoothing: 0.1
173
+ factor-weight: 1
174
+ clip-norm: 0
175
+ exponential-smoothing: 1e-3
176
+ exponential-smoothing-replace-freq: 0
177
+ guided-alignment: none
178
+ guided-alignment-cost: ce
179
+ guided-alignment-weight: 0
180
+ data-weighting: ""
181
+ data-weighting-type: sentence
182
+ embedding-vectors:
183
+ []
184
+ embedding-normalization: false
185
+ embedding-fix-src: false
186
+ embedding-fix-trg: false
187
+ precision:
188
+ - float32
189
+ - float32
190
+ cost-scaling:
191
+ - 256.f
192
+ - 10000
193
+ - 1.f
194
+ - 256.f
195
+ throw-on-divergence:
196
+ []
197
+ custom-fallbacks:
198
+ []
199
+ gradient-norm-average-window: 100
200
+ dynamic-gradient-scaling:
201
+ - 2
202
+ - log
203
+ check-gradient-nan: false
204
+ normalize-gradient: false
205
+ train-embedder-rank:
206
+ []
207
+ quantize-bits: 0
208
+ quantize-optimization-steps: 0
209
+ quantize-log-based: false
210
+ quantize-biases: false
211
+ ulr: false
212
+ ulr-query-vectors: ""
213
+ ulr-keys-vectors: ""
214
+ ulr-trainable-transformation: false
215
+ ulr-dim-emb: 0
216
+ ulr-dropout: 0
217
+ ulr-softmax-temperature: 1
218
+ valid-sets:
219
+ - dev.en-de
220
+ valid-freq: 1Gt
221
+ valid-metrics:
222
+ - perplexity
223
+ - ce-mean-words
224
+ - bleu
225
+ - chrf
226
+ valid-reset-stalled: false
227
+ valid-reset-all: false
228
+ early-stopping: 40
229
+ early-stopping-epsilon:
230
+ - 0
231
+ early-stopping-on: first
232
+ beam-size: 4
233
+ normalize: 1.0
234
+ max-length-factor: 3
235
+ word-penalty: 0.0
236
+ allow-unk: false
237
+ n-best: false
238
+ word-scores: false
239
+ valid-mini-batch: 32
240
+ valid-max-length: 1000
241
+ valid-script-path: ""
242
+ valid-script-args:
243
+ []
244
+ valid-translation-output: valid.trg.output
245
+ keep-best: true
246
+ valid-log: valid.log