kwang2049 commited on
Commit
d0c1a4d
1 Parent(s): dc8b561

Upload config.json

Browse files
Files changed (1) hide show
  1. config.json +142 -0
config.json ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_reader": {
3
+ "type": "coref",
4
+ "max_sentences": 110,
5
+ "max_span_width": 30,
6
+ "token_indexers": {
7
+ "tokens": {
8
+ "type": "pretrained_transformer_mismatched",
9
+ "max_length": 512,
10
+ "model_name": "SpanBERT/spanbert-large-cased"
11
+ }
12
+ }
13
+ },
14
+ "model": {
15
+ "type": "coref",
16
+ "antecedent_feedforward": {
17
+ "activations": "relu",
18
+ "dropout": 0.3,
19
+ "hidden_dims": 1500,
20
+ "input_dim": 9296,
21
+ "num_layers": 2
22
+ },
23
+ "coarse_to_fine": true,
24
+ "context_layer": {
25
+ "type": "pass_through",
26
+ "input_dim": 1024
27
+ },
28
+ "feature_size": 20,
29
+ "inference_order": 2,
30
+ "initializer": {
31
+ "regexes": [
32
+ [
33
+ ".*_span_updating_gated_sum.*weight",
34
+ {
35
+ "type": "xavier_normal"
36
+ }
37
+ ],
38
+ [
39
+ ".*linear_layers.*weight",
40
+ {
41
+ "type": "xavier_normal"
42
+ }
43
+ ],
44
+ [
45
+ ".*scorer.*weight",
46
+ {
47
+ "type": "xavier_normal"
48
+ }
49
+ ],
50
+ [
51
+ "_distance_embedding.weight",
52
+ {
53
+ "type": "xavier_normal"
54
+ }
55
+ ],
56
+ [
57
+ "_span_width_embedding.weight",
58
+ {
59
+ "type": "xavier_normal"
60
+ }
61
+ ],
62
+ [
63
+ "_context_layer._module.weight_ih.*",
64
+ {
65
+ "type": "xavier_normal"
66
+ }
67
+ ],
68
+ [
69
+ "_context_layer._module.weight_hh.*",
70
+ {
71
+ "type": "orthogonal"
72
+ }
73
+ ]
74
+ ]
75
+ },
76
+ "max_antecedents": 50,
77
+ "max_span_width": 30,
78
+ "mention_feedforward": {
79
+ "activations": "relu",
80
+ "dropout": 0.3,
81
+ "hidden_dims": 1500,
82
+ "input_dim": 3092,
83
+ "num_layers": 2
84
+ },
85
+ "spans_per_word": 0.4,
86
+ "text_field_embedder": {
87
+ "token_embedders": {
88
+ "tokens": {
89
+ "type": "pretrained_transformer_mismatched",
90
+ "max_length": 512,
91
+ "model_name": "SpanBERT/spanbert-large-cased"
92
+ }
93
+ }
94
+ }
95
+ },
96
+ "train_data_path": "/home/dirkg/tank/data/conll12/train.english.v4_gold_conll",
97
+ "validation_data_path": "/home/dirkg/tank/data/conll12/dev.english.v4_gold_conll",
98
+ "test_data_path": "/home/dirkg/tank/data/conll12/test.english.v4_gold_conll",
99
+ "trainer": {
100
+ "learning_rate_scheduler": {
101
+ "type": "slanted_triangular",
102
+ "cut_frac": 0.06
103
+ },
104
+ "num_epochs": 40,
105
+ "optimizer": {
106
+ "type": "huggingface_adamw",
107
+ "lr": 0.0003,
108
+ "parameter_groups": [
109
+ [
110
+ [
111
+ ".*transformer.*"
112
+ ],
113
+ {
114
+ "lr": 1e-05
115
+ }
116
+ ]
117
+ ]
118
+ },
119
+ "patience": 10,
120
+ "validation_metric": "+coref_f1"
121
+ },
122
+ "data_loader": {
123
+ "batch_sampler": {
124
+ "type": "bucket",
125
+ "batch_size": 1,
126
+ "sorting_keys": [
127
+ "text"
128
+ ]
129
+ }
130
+ },
131
+ "validation_dataset_reader": {
132
+ "type": "coref",
133
+ "max_span_width": 30,
134
+ "token_indexers": {
135
+ "tokens": {
136
+ "type": "pretrained_transformer_mismatched",
137
+ "max_length": 512,
138
+ "model_name": "SpanBERT/spanbert-large-cased"
139
+ }
140
+ }
141
+ }
142
+ }