File size: 4,937 Bytes
67a2b13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
callbacks:
callbacks: []
data:
datamodule:
_target_: classy.data.data_modules.ClassyDataModule
task: ${task}
dataset_path: data/aida
train_dataset:
_target_: classy.data.dataset.hf.classification.HFQADataset.from_file
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
min_length: 5
max_length: 500
tokens_per_batch: 2000
max_batch_size: 10
section_size: 10000
prebatch: true
materialize: false
for_inference: false
validation_dataset:
_target_: classy.data.dataset.hf.classification.HFQADataset.from_file
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
min_length: 5
max_length: 500
tokens_per_batch: 2000
max_batch_size: 10
section_size: 10000
prebatch: true
materialize: true
for_inference: true
validation_split_size: 0.1
test_split_size: 0.1
max_nontrain_split_size: 10000
shuffle_dataset: true
device:
gpus:
- 0
precision: 32
amp_level: O0
model:
_target_: classy.pl_modules.hf.classification.HFQAPLModule
transformer_model: ${transformer_model}
additional_special_tokens: []
optim_conf:
_target_: classy.optim.factories.RAdamFactory
lr: 1.0e-05
weight_decay: 0.01
no_decay_params:
- bias
- LayerNorm.weight
prediction:
dataset:
_target_: classy.data.dataset.hf.classification.HFQADataset.from_samples
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
min_length: -1
max_length: -1
tokens_per_batch: 800
max_batch_size: -1
section_size: 10000
prebatch: true
materialize: false
for_inference: true
training:
seed: 12
pl_trainer:
_target_: pytorch_lightning.Trainer
accumulate_grad_batches: 4
gradient_clip_val: 10.0
val_check_interval: 1.0
max_steps: 1000000
early_stopping_callback:
_target_: pytorch_lightning.callbacks.EarlyStopping
monitor: ${callbacks_monitor}
mode: ${callbacks_mode}
patience: 25
model_checkpoint_callback:
_target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest
monitor: ${callbacks_monitor}
mode: ${callbacks_mode}
verbose: true
save_top_k: 3
dirpath: checkpoints
save_last: true
resume_from: null
logging:
wandb:
use_wandb: true
project_name: esc-ed
experiment_name: aida-longformer-large-*sep-gam-cand-shuffle
anonymous: null
run_id: null
task: qa
project_name: classy
exp_name: esc-aida-longformer-large-gam-cand-shuffle
exp_folder: ./experiments/${exp_name}
transformer_model: bert-base-cased
callbacks_monitor: val_accuracy
callbacks_mode: max
profiles:
supported_tasks:
- qa
- sentence-pair
- sequence
- token
- generation
transformer_model: allenai/longformer-large-4096
candidates_separator: '*'
training:
pl_trainer:
accumulate_grad_batches: 8
val_check_interval: 2048
max_steps: 100000
model:
_target_: src.esc_ed_module.ESCModule
additional_special_tokens: []
transformer_model: ${transformer_model}
attention_window: 64
modify_global_attention: true
optim_conf:
_target_: classy.optim.factories.RAdamFactory
lr: 1.0e-05
weight_decay: 0.01
no_decay_params:
- bias
- LayerNorm.weight
data:
datamodule:
train_dataset:
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
candidates_separator: ${candidates_separator}
shuffle_candidates_prob: 0.0
min_length: 0
max_length: 1024
tokens_per_batch: 1024
max_batch_size: 10
section_size: 20000
prebatch: true
materialize: false
for_inference: false
validation_dataset:
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
candidates_separator: ${candidates_separator}
min_length: 0
max_length: 1024
tokens_per_batch: 2048
max_batch_size: 10
section_size: 10000
prebatch: true
materialize: true
for_inference: false
shuffle_dataset: true
prediction:
dataset:
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_samples
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
candidates_separator: ${candidates_separator}
min_length: -1
max_length: -1
tokens_per_batch: 2048
max_batch_size: -1
section_size: 10000
prebatch: true
materialize: false
for_inference: true
|