|
callbacks: |
|
callbacks: [] |
|
data: |
|
datamodule: |
|
_target_: classy.data.data_modules.ClassyDataModule |
|
task: ${task} |
|
dataset_path: data/aida |
|
train_dataset: |
|
_target_: classy.data.dataset.hf.classification.HFQADataset.from_file |
|
transformer_model: ${transformer_model} |
|
additional_special_tokens: ${model.additional_special_tokens} |
|
min_length: 5 |
|
max_length: 500 |
|
tokens_per_batch: 2000 |
|
max_batch_size: 10 |
|
section_size: 10000 |
|
prebatch: true |
|
materialize: false |
|
for_inference: false |
|
validation_dataset: |
|
_target_: classy.data.dataset.hf.classification.HFQADataset.from_file |
|
transformer_model: ${transformer_model} |
|
additional_special_tokens: ${model.additional_special_tokens} |
|
min_length: 5 |
|
max_length: 500 |
|
tokens_per_batch: 2000 |
|
max_batch_size: 10 |
|
section_size: 10000 |
|
prebatch: true |
|
materialize: true |
|
for_inference: true |
|
validation_split_size: 0.1 |
|
test_split_size: 0.1 |
|
max_nontrain_split_size: 10000 |
|
shuffle_dataset: true |
|
device: |
|
gpus: |
|
- 0 |
|
precision: 32 |
|
amp_level: O0 |
|
model: |
|
_target_: classy.pl_modules.hf.classification.HFQAPLModule |
|
transformer_model: ${transformer_model} |
|
additional_special_tokens: [] |
|
optim_conf: |
|
_target_: classy.optim.factories.RAdamFactory |
|
lr: 1.0e-05 |
|
weight_decay: 0.01 |
|
no_decay_params: |
|
- bias |
|
- LayerNorm.weight |
|
prediction: |
|
dataset: |
|
_target_: classy.data.dataset.hf.classification.HFQADataset.from_samples |
|
transformer_model: ${transformer_model} |
|
additional_special_tokens: ${model.additional_special_tokens} |
|
min_length: -1 |
|
max_length: -1 |
|
tokens_per_batch: 800 |
|
max_batch_size: -1 |
|
section_size: 10000 |
|
prebatch: true |
|
materialize: false |
|
for_inference: true |
|
training: |
|
seed: 12 |
|
pl_trainer: |
|
_target_: pytorch_lightning.Trainer |
|
accumulate_grad_batches: 4 |
|
gradient_clip_val: 10.0 |
|
val_check_interval: 1.0 |
|
max_steps: 1000000 |
|
early_stopping_callback: |
|
_target_: pytorch_lightning.callbacks.EarlyStopping |
|
monitor: ${callbacks_monitor} |
|
mode: ${callbacks_mode} |
|
patience: 25 |
|
model_checkpoint_callback: |
|
_target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest |
|
monitor: ${callbacks_monitor} |
|
mode: ${callbacks_mode} |
|
verbose: true |
|
save_top_k: 3 |
|
dirpath: checkpoints |
|
save_last: true |
|
resume_from: null |
|
logging: |
|
wandb: |
|
use_wandb: true |
|
project_name: esc-ed |
|
experiment_name: aida-longformer-large-*sep-gam-cand-shuffle |
|
anonymous: null |
|
run_id: null |
|
task: qa |
|
project_name: classy |
|
exp_name: esc-aida-longformer-large-gam-cand-shuffle |
|
exp_folder: ./experiments/${exp_name} |
|
transformer_model: bert-base-cased |
|
callbacks_monitor: val_accuracy |
|
callbacks_mode: max |
|
profiles: |
|
supported_tasks: |
|
- qa |
|
- sentence-pair |
|
- sequence |
|
- token |
|
- generation |
|
transformer_model: allenai/longformer-large-4096 |
|
candidates_separator: '*' |
|
training: |
|
pl_trainer: |
|
accumulate_grad_batches: 8 |
|
val_check_interval: 2048 |
|
max_steps: 100000 |
|
model: |
|
_target_: src.esc_ed_module.ESCModule |
|
additional_special_tokens: [] |
|
transformer_model: ${transformer_model} |
|
attention_window: 64 |
|
modify_global_attention: true |
|
optim_conf: |
|
_target_: classy.optim.factories.RAdamFactory |
|
lr: 1.0e-05 |
|
weight_decay: 0.01 |
|
no_decay_params: |
|
- bias |
|
- LayerNorm.weight |
|
data: |
|
datamodule: |
|
train_dataset: |
|
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file |
|
transformer_model: ${transformer_model} |
|
additional_special_tokens: ${model.additional_special_tokens} |
|
candidates_separator: ${candidates_separator} |
|
shuffle_candidates_prob: 0.0 |
|
min_length: 0 |
|
max_length: 1024 |
|
tokens_per_batch: 1024 |
|
max_batch_size: 10 |
|
section_size: 20000 |
|
prebatch: true |
|
materialize: false |
|
for_inference: false |
|
validation_dataset: |
|
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_file |
|
transformer_model: ${transformer_model} |
|
additional_special_tokens: ${model.additional_special_tokens} |
|
candidates_separator: ${candidates_separator} |
|
min_length: 0 |
|
max_length: 1024 |
|
tokens_per_batch: 2048 |
|
max_batch_size: 10 |
|
section_size: 10000 |
|
prebatch: true |
|
materialize: true |
|
for_inference: false |
|
shuffle_dataset: true |
|
prediction: |
|
dataset: |
|
_target_: src.data.esc_ed_dataset.ESCEDDataset.from_samples |
|
transformer_model: ${transformer_model} |
|
additional_special_tokens: ${model.additional_special_tokens} |
|
candidates_separator: ${candidates_separator} |
|
min_length: -1 |
|
max_length: -1 |
|
tokens_per_batch: 2048 |
|
max_batch_size: -1 |
|
section_size: 10000 |
|
prebatch: true |
|
materialize: false |
|
for_inference: true |
|
|