poccio's picture
initial commit
67a2b13
callbacks:
callbacks: []
data:
datamodule:
_target_: classy.data.data_modules.ClassyDataModule
task: ${task}
dataset_path: data/aida
train_dataset:
_target_: classy.data.dataset.hf.classification.HFQADataset.from_file
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
min_length: 5
max_length: 500
tokens_per_batch: 2000
max_batch_size: 10
section_size: 10000
prebatch: true
materialize: false
for_inference: false
validation_dataset:
_target_: classy.data.dataset.hf.classification.HFQADataset.from_file
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
min_length: 5
max_length: 500
tokens_per_batch: 2000
max_batch_size: 10
section_size: 10000
prebatch: true
materialize: true
for_inference: true
validation_split_size: 0.1
test_split_size: 0.1
max_nontrain_split_size: 10000
shuffle_dataset: true
device:
gpus:
- 0
precision: 32
amp_level: O0
model:
_target_: classy.pl_modules.hf.classification.HFQAPLModule
transformer_model: ${transformer_model}
additional_special_tokens: []
optim_conf:
_target_: classy.optim.factories.RAdamFactory
lr: 1.0e-05
weight_decay: 0.01
no_decay_params:
- bias
- LayerNorm.weight
prediction:
dataset:
_target_: classy.data.dataset.hf.classification.HFQADataset.from_samples
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
min_length: -1
max_length: -1
tokens_per_batch: 800
max_batch_size: -1
section_size: 10000
prebatch: true
materialize: false
for_inference: true
training:
seed: 12
pl_trainer:
_target_: pytorch_lightning.Trainer
accumulate_grad_batches: 4
gradient_clip_val: 10.0
val_check_interval: 1.0
max_steps: 1000000
early_stopping_callback:
_target_: pytorch_lightning.callbacks.EarlyStopping
monitor: ${callbacks_monitor}
mode: ${callbacks_mode}
patience: 25
model_checkpoint_callback:
_target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest
monitor: ${callbacks_monitor}
mode: ${callbacks_mode}
verbose: true
save_top_k: 3
dirpath: checkpoints
save_last: true
resume_from: null
logging:
wandb:
use_wandb: true
project_name: esc-ed
experiment_name: aida-longformer-large-*sep-gam-cand-shuffle
anonymous: null
run_id: null
task: qa
project_name: classy
exp_name: esc-aida-longformer-large-gam-cand-shuffle
exp_folder: ./experiments/${exp_name}
transformer_model: bert-base-cased
callbacks_monitor: val_accuracy
callbacks_mode: max
profiles:
supported_tasks:
- qa
- sentence-pair
- sequence
- token
- generation
transformer_model: allenai/longformer-large-4096
candidates_separator: '*'
training:
pl_trainer:
accumulate_grad_batches: 8
val_check_interval: 2048
max_steps: 100000
model:
_target_: src.esc_ed_module.ESCModule
additional_special_tokens: []
transformer_model: ${transformer_model}
attention_window: 64
modify_global_attention: true
optim_conf:
_target_: classy.optim.factories.RAdamFactory
lr: 1.0e-05
weight_decay: 0.01
no_decay_params:
- bias
- LayerNorm.weight
data:
datamodule:
train_dataset:
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
candidates_separator: ${candidates_separator}
shuffle_candidates_prob: 0.0
min_length: 0
max_length: 1024
tokens_per_batch: 1024
max_batch_size: 10
section_size: 20000
prebatch: true
materialize: false
for_inference: false
validation_dataset:
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
candidates_separator: ${candidates_separator}
min_length: 0
max_length: 1024
tokens_per_batch: 2048
max_batch_size: 10
section_size: 10000
prebatch: true
materialize: true
for_inference: false
shuffle_dataset: true
prediction:
dataset:
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_samples
transformer_model: ${transformer_model}
additional_special_tokens: ${model.additional_special_tokens}
candidates_separator: ${candidates_separator}
min_length: -1
max_length: -1
tokens_per_batch: 2048
max_batch_size: -1
section_size: 10000
prebatch: true
materialize: false
for_inference: true