File size: 2,011 Bytes
8878c0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
name: "Kashmiri-Urdu_1"

data:
    train: "datasets/Kashmiri-Urdu/1/train"
    dev: "datasets/Kashmiri-Urdu/1/dev"
    test: "datasets/Kashmiri-Urdu/1/test"
    level: "char"
    lowercase: False
    normalize: False
    max_sent_length: 100
    dataset_type: "plain"

    src:
        lang: "src"
        voc_limit: 100
        voc_min_freq: 5
        level: "char"
    trg:
        lang: "trg"
        voc_limit: 100
        voc_min_freq: 5
        level: "char"
    
training:
    random_seed: 42
    optimizer: "adam"
    learning_rate: 0.001
    learning_rate_min: 0.0002
    weight_decay: 0.0
    clip_grad_norm: 1.0
    batch_size: 64
    scheduling: "plateau"
    patience: 10
    decrease_factor: 0.5
    early_stopping_metric: "loss"
    epochs: 100
    validation_freq: 1000
    logging_freq: 100
    eval_metric: "bleu"
    model_dir: "models/Kashmiri-Urdu"
    overwrite: True
    shuffle: True
    use_cuda: True
    max_output_length: 100
    print_valid_sents: [0, 3, 6, 9]
    keep_best_ckpts: -1

testing:
    n_best: 1
    beam_size: 4
    beam_alpha: 1.0
    eval_metrics: ["bleu", "chrf", "sequence_accuracy"]
    max_output_length: 50
    batch_size: 10
    batch_type: "sentence"
    return_prob: "none"

model:
    initializer: "xavier_uniform" 
    init_gain: 1.0                
    bias_initializer: "zeros"     
    embed_initializer: "xavier_uniform" 
    embed_init_gain: 1.0          
    encoder:
        type: "transformer"
        num_layers: 6
        num_heads: 8
        embeddings:
            embedding_dim: 128
            scale: True
        # typically ff_size = 4 x hidden_size
        hidden_size: 128
        ff_size: 512
        dropout: 0.2
        layer_norm: "pre"
    decoder:
        type: "transformer"
        num_layers: 6
        num_heads: 8
        embeddings:
            embedding_dim: 128
            scale: True
        # typically ff_size = 4 x hidden_size
        hidden_size: 128
        ff_size: 512
        dropout: 0.2
        layer_norm: "pre"