File size: 3,399 Bytes
2ed3acc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
{
    "dataset_reader": {
        "type": "compreno_ud_dataset_reader",
        "token_indexers": {
            "tokens": {
                "type": "pretrained_transformer_mismatched",
                "model_name": "xlm-roberta-base"
            }
        }
    },
    "model": {
        "type": "morpho_syntax_semantic_parser",
        "depencency_classifier": {
            "activation": "relu",
            "dropout": 0.1,
            "hid_dim": 128
        },
        "embedder": {
            "type": "pretrained_transformer_mismatched",
            "model_name": "xlm-roberta-base",
            "train_parameters": true
        },
        "indexer": {
            "type": "pretrained_transformer_mismatched",
            "model_name": "xlm-roberta-base"
        },
        "lemma_rule_classifier": {
            "activation": "relu",
            "dropout": 0.1,
            "hid_dim": 512
        },
        "misc_classifier": {
            "activation": "relu",
            "dropout": 0.1,
            "hid_dim": 128
        },
        "null_classifier": {
            "activation": "relu",
            "dropout": 0.1,
            "hid_dim": 512
        },
        "pos_feats_classifier": {
            "activation": "relu",
            "dropout": 0.1,
            "hid_dim": 256
        },
        "semclass_classifier": {
            "activation": "relu",
            "dropout": 0.1,
            "hid_dim": 1024
        },
        "semslot_classifier": {
            "activation": "relu",
            "dropout": 0.1,
            "hid_dim": 1024
        }
    },
    "train_data_path": "data/train.conllu",
    "validation_data_path": "data/validation.conllu",
    "trainer": {
        "type": "gradient_descent",
        "callbacks": [
            {
                "should_log_learning_rate": true,
                "should_log_parameter_statistics": false,
                "type": "tensorboard"
            }
        ],
        "cuda_device": 0,
        "grad_clipping": 5,
        "learning_rate_scheduler": {
            "type": "slanted_triangular",
            "cut_frac": 0,
            "decay_factor": 0.001,
            "discriminative_fine_tuning": true,
            "gradual_unfreezing": true,
            "ratio": 32
        },
        "num_epochs": 10,
        "optimizer": {
            "type": "adam",
            "lr": 0.01,
            "parameter_groups": [
                [
                    [
                        "embedder"
                    ],
                    {}
                ],
                [
                    [
                        "lemma_rule_classifier",
                        "pos_feats_classifier",
                        "dependency_classifier",
                        "misc_classifier",
                        "semslot_classifier",
                        "semclass_classifier",
                        "null_classifier"
                    ],
                    {}
                ]
            ]
        },
        "validation_metric": "+Avg"
    },
    "vocabulary": {
        "min_count": {
            "lemma_rule_labels": 2
        },
        "tokens_to_add": {
            "lemma_rule_labels": [
                "@@UNKNOWN@@"
            ]
        }
    },
    "data_loader": {
        "batch_size": 24,
        "shuffle": true
    },
    "validation_data_loader": {
        "batch_size": 24,
        "shuffle": false
    }
}