|
{ |
|
"dataset_reader": { |
|
"type": "compreno_ud_dataset_reader", |
|
"token_indexers": { |
|
"tokens": { |
|
"type": "pretrained_transformer_mismatched", |
|
"model_name": "xlm-roberta-base" |
|
} |
|
} |
|
}, |
|
"model": { |
|
"type": "morpho_syntax_semantic_parser", |
|
"depencency_classifier": { |
|
"activation": "relu", |
|
"dropout": 0.1, |
|
"hid_dim": 128 |
|
}, |
|
"embedder": { |
|
"type": "pretrained_transformer_mismatched", |
|
"model_name": "xlm-roberta-base", |
|
"train_parameters": true |
|
}, |
|
"indexer": { |
|
"type": "pretrained_transformer_mismatched", |
|
"model_name": "xlm-roberta-base" |
|
}, |
|
"lemma_rule_classifier": { |
|
"activation": "relu", |
|
"dropout": 0.1, |
|
"hid_dim": 512 |
|
}, |
|
"misc_classifier": { |
|
"activation": "relu", |
|
"dropout": 0.1, |
|
"hid_dim": 128 |
|
}, |
|
"null_classifier": { |
|
"activation": "relu", |
|
"dropout": 0.1, |
|
"hid_dim": 512 |
|
}, |
|
"pos_feats_classifier": { |
|
"activation": "relu", |
|
"dropout": 0.1, |
|
"hid_dim": 256 |
|
}, |
|
"semclass_classifier": { |
|
"activation": "relu", |
|
"dropout": 0.1, |
|
"hid_dim": 1024 |
|
}, |
|
"semslot_classifier": { |
|
"activation": "relu", |
|
"dropout": 0.1, |
|
"hid_dim": 1024 |
|
} |
|
}, |
|
"train_data_path": "data/train.conllu", |
|
"validation_data_path": "data/validation.conllu", |
|
"trainer": { |
|
"type": "gradient_descent", |
|
"callbacks": [ |
|
{ |
|
"should_log_learning_rate": true, |
|
"should_log_parameter_statistics": false, |
|
"type": "tensorboard" |
|
} |
|
], |
|
"cuda_device": 0, |
|
"grad_clipping": 5, |
|
"learning_rate_scheduler": { |
|
"type": "slanted_triangular", |
|
"cut_frac": 0, |
|
"decay_factor": 0.001, |
|
"discriminative_fine_tuning": true, |
|
"gradual_unfreezing": true, |
|
"ratio": 32 |
|
}, |
|
"num_epochs": 10, |
|
"optimizer": { |
|
"type": "adam", |
|
"lr": 0.01, |
|
"parameter_groups": [ |
|
[ |
|
[ |
|
"embedder" |
|
], |
|
{} |
|
], |
|
[ |
|
[ |
|
"lemma_rule_classifier", |
|
"pos_feats_classifier", |
|
"dependency_classifier", |
|
"misc_classifier", |
|
"semslot_classifier", |
|
"semclass_classifier", |
|
"null_classifier" |
|
], |
|
{} |
|
] |
|
] |
|
}, |
|
"validation_metric": "+Avg" |
|
}, |
|
"vocabulary": { |
|
"min_count": { |
|
"lemma_rule_labels": 2 |
|
}, |
|
"tokens_to_add": { |
|
"lemma_rule_labels": [ |
|
"@@UNKNOWN@@" |
|
] |
|
} |
|
}, |
|
"data_loader": { |
|
"batch_size": 24, |
|
"shuffle": true |
|
}, |
|
"validation_data_loader": { |
|
"batch_size": 24, |
|
"shuffle": false |
|
} |
|
} |