{ "dataset_reader": { "type": "compreno_ud_dataset_reader", "token_indexers": { "tokens": { "type": "pretrained_transformer_mismatched", "model_name": "xlm-roberta-base" } } }, "model": { "type": "morpho_syntax_semantic_parser", "depencency_classifier": { "activation": "relu", "dropout": 0.1, "hid_dim": 128 }, "embedder": { "type": "pretrained_transformer_mismatched", "model_name": "xlm-roberta-base", "train_parameters": true }, "indexer": { "type": "pretrained_transformer_mismatched", "model_name": "xlm-roberta-base" }, "lemma_rule_classifier": { "activation": "relu", "dropout": 0.1, "hid_dim": 512 }, "misc_classifier": { "activation": "relu", "dropout": 0.1, "hid_dim": 128 }, "null_classifier": { "activation": "relu", "dropout": 0.1, "hid_dim": 512 }, "pos_feats_classifier": { "activation": "relu", "dropout": 0.1, "hid_dim": 256 }, "semclass_classifier": { "activation": "relu", "dropout": 0.1, "hid_dim": 1024 }, "semslot_classifier": { "activation": "relu", "dropout": 0.1, "hid_dim": 1024 } }, "train_data_path": "data/train.conllu", "validation_data_path": "data/validation.conllu", "trainer": { "type": "gradient_descent", "callbacks": [ { "should_log_learning_rate": true, "should_log_parameter_statistics": false, "type": "tensorboard" } ], "cuda_device": 0, "grad_clipping": 5, "learning_rate_scheduler": { "type": "slanted_triangular", "cut_frac": 0, "decay_factor": 0.001, "discriminative_fine_tuning": true, "gradual_unfreezing": true, "ratio": 32 }, "num_epochs": 10, "optimizer": { "type": "adam", "lr": 0.01, "parameter_groups": [ [ [ "embedder" ], {} ], [ [ "lemma_rule_classifier", "pos_feats_classifier", "dependency_classifier", "misc_classifier", "semslot_classifier", "semclass_classifier", "null_classifier" ], {} ] ] }, "validation_metric": "+Avg" }, "vocabulary": { "min_count": { "lemma_rule_labels": 2 }, "tokens_to_add": { "lemma_rule_labels": [ "@@UNKNOWN@@" ] } }, "data_loader": { "batch_size": 24, "shuffle": true }, "validation_data_loader": { "batch_size": 24, "shuffle": false } }