authors: false cite: false build-info: "" workspace: -8000 log: train.log log-level: info log-time-zone: PST8PDT quiet: false quiet-translation: true seed: 141414 check-nan: false interpolate-env-vars: true relative-paths: false dump-config: "" sigterm: save-and-exit model: model_files/model.npz pretrained-model: "" ignore-model-config: false type: transformer dim-vocabs: - 64000 - 64000 dim-emb: 1024 factors-dim-emb: 0 factors-combine: sum lemma-dependency: "" lemma-dim-emb: 0 dim-rnn: 1024 enc-type: bidirectional enc-cell: gru enc-cell-depth: 1 enc-depth: 6 dec-cell: gru dec-cell-base-depth: 2 dec-cell-high-depth: 1 dec-depth: 6 skip: false layer-normalization: false right-left: false input-types: [] tied-embeddings: true tied-embeddings-src: false tied-embeddings-all: true output-omit-bias: false transformer-heads: 8 transformer-no-projection: false transformer-rnn-projection: false transformer-pool: false transformer-dim-ffn: 8192 transformer-decoder-dim-ffn: 8192 transformer-ffn-depth: 2 transformer-decoder-ffn-depth: 0 transformer-ffn-activation: relu transformer-dim-aan: 2048 transformer-aan-depth: 2 transformer-aan-activation: swish transformer-aan-nogate: false transformer-decoder-autoreg: self-attention transformer-tied-layers: [] transformer-guided-alignment-layer: last transformer-preprocess: "" transformer-postprocess-emb: d transformer-postprocess: dan transformer-postprocess-top: "" transformer-train-position-embeddings: false transformer-depth-scaling: true transformer-no-bias: false transformer-no-affine: false bert-mask-symbol: "[MASK]" bert-sep-symbol: "[SEP]" bert-class-symbol: "[CLS]" bert-masking-fraction: 0.15 bert-train-type-embeddings: true bert-type-vocab-size: 2 comet-final-sigmoid: false comet-mix: false comet-mix-norm: false comet-dropout: 0.1 comet-mixup: 0 comet-mixup-reg: false comet-pooler-ffn: - 2048 - 1024 comet-prepend-zero: false dropout-rnn: 0 dropout-src: 0 dropout-trg: 0 transformer-dropout: 0.1 transformer-dropout-attention: 0 transformer-dropout-ffn: 0.1 cost-type: ce-sum multi-loss-type: sum unlikelihood-loss: false overwrite: false overwrite-checkpoint: true no-reload: false train-sets: - stdin vocabs: - vocab - vocab sentencepiece-alphas: [] sentencepiece-options: "" sentencepiece-max-lines: 2000000 no-spm-encode: false after-epochs: 0 after-batches: 0 after: 40e disp-freq: 100Mt disp-first: 10 disp-label-counts: true save-freq: 1Gt logical-epoch: - 1Gt max-length: 256 max-length-crop: false tsv: true tsv-fields: 2 shuffle: batches no-restore-corpus: true tempdir: /tmp sqlite: "" sqlite-drop: false devices: - 0 - 1 no-nccl: false sharding: local sync-freq: 200u cpu-threads: 0 mini-batch: 1000 mini-batch-words: 500000 mini-batch-fit: true mini-batch-fit-step: 5 gradient-checkpointing: false maxi-batch: 1000 maxi-batch-sort: trg shuffle-in-ram: true data-threads: 8 all-caps-every: 0 english-title-case-every: 0 mini-batch-words-ref: 0 mini-batch-warmup: 4000 mini-batch-track-lr: false mini-batch-round-up: true optimizer: adam optimizer-params: - 0.9 - 0.999 - 1e-08 - 0.01 optimizer-delay: 1 sync-sgd: true learn-rate: 0.0005 lr-report: true lr-decay: 0 lr-decay-strategy: epoch+stalled lr-decay-start: - 10 - 1 lr-decay-freq: 50000 lr-decay-reset-optimizer: false lr-decay-repeat-warmup: false lr-decay-inv-sqrt: - 4000 lr-warmup: 4000 lr-warmup-start-rate: 0 lr-warmup-cycle: false lr-warmup-at-reload: false label-smoothing: 0.1 factor-weight: 1 clip-norm: 0 exponential-smoothing: 1e-3 exponential-smoothing-replace-freq: 0 guided-alignment: none guided-alignment-cost: ce guided-alignment-weight: 0 data-weighting: "" data-weighting-type: sentence embedding-vectors: [] embedding-normalization: false embedding-fix-src: false embedding-fix-trg: false precision: - float32 - float32 cost-scaling: - 256.f - 10000 - 1.f - 256.f throw-on-divergence: [] custom-fallbacks: [] gradient-norm-average-window: 100 dynamic-gradient-scaling: - 2 - log check-gradient-nan: false normalize-gradient: false train-embedder-rank: [] quantize-bits: 0 quantize-optimization-steps: 0 quantize-log-based: false quantize-biases: false ulr: false ulr-query-vectors: "" ulr-keys-vectors: "" ulr-trainable-transformation: false ulr-dim-emb: 0 ulr-dropout: 0 ulr-softmax-temperature: 1 valid-sets: - dev.en-de valid-freq: 1Gt valid-metrics: - perplexity - ce-mean-words - bleu - chrf valid-reset-stalled: false valid-reset-all: false early-stopping: 40 early-stopping-epsilon: - 0 early-stopping-on: first beam-size: 4 normalize: 1.0 max-length-factor: 3 word-penalty: 0.0 allow-unk: false n-best: false word-scores: false valid-mini-batch: 32 valid-max-length: 1000 valid-script-path: "" valid-script-args: [] valid-translation-output: valid.trg.output keep-best: true valid-log: valid.log