callbacks: callbacks: [] data: datamodule: _target_: classy.data.data_modules.ClassyDataModule task: ${task} dataset_path: data/aida train_dataset: _target_: classy.data.dataset.hf.classification.HFQADataset.from_file transformer_model: ${transformer_model} additional_special_tokens: ${model.additional_special_tokens} min_length: 5 max_length: 500 tokens_per_batch: 2000 max_batch_size: 10 section_size: 10000 prebatch: true materialize: false for_inference: false validation_dataset: _target_: classy.data.dataset.hf.classification.HFQADataset.from_file transformer_model: ${transformer_model} additional_special_tokens: ${model.additional_special_tokens} min_length: 5 max_length: 500 tokens_per_batch: 2000 max_batch_size: 10 section_size: 10000 prebatch: true materialize: true for_inference: true validation_split_size: 0.1 test_split_size: 0.1 max_nontrain_split_size: 10000 shuffle_dataset: true device: gpus: - 0 precision: 32 amp_level: O0 model: _target_: classy.pl_modules.hf.classification.HFQAPLModule transformer_model: ${transformer_model} additional_special_tokens: [] optim_conf: _target_: classy.optim.factories.RAdamFactory lr: 1.0e-05 weight_decay: 0.01 no_decay_params: - bias - LayerNorm.weight prediction: dataset: _target_: classy.data.dataset.hf.classification.HFQADataset.from_samples transformer_model: ${transformer_model} additional_special_tokens: ${model.additional_special_tokens} min_length: -1 max_length: -1 tokens_per_batch: 800 max_batch_size: -1 section_size: 10000 prebatch: true materialize: false for_inference: true training: seed: 12 pl_trainer: _target_: pytorch_lightning.Trainer accumulate_grad_batches: 4 gradient_clip_val: 10.0 val_check_interval: 1.0 max_steps: 1000000 early_stopping_callback: _target_: pytorch_lightning.callbacks.EarlyStopping monitor: ${callbacks_monitor} mode: ${callbacks_mode} patience: 25 model_checkpoint_callback: _target_: classy.pl_callbacks.best_checkpoint.ModelCheckpointWithBest monitor: ${callbacks_monitor} mode: ${callbacks_mode} verbose: true save_top_k: 3 dirpath: checkpoints save_last: true resume_from: null logging: wandb: use_wandb: true project_name: esc-ed experiment_name: aida-longformer-large-*sep-gam-cand-shuffle anonymous: null run_id: null task: qa project_name: classy exp_name: esc-aida-longformer-large-gam-cand-shuffle exp_folder: ./experiments/${exp_name} transformer_model: bert-base-cased callbacks_monitor: val_accuracy callbacks_mode: max profiles: supported_tasks: - qa - sentence-pair - sequence - token - generation transformer_model: allenai/longformer-large-4096 candidates_separator: '*' training: pl_trainer: accumulate_grad_batches: 8 val_check_interval: 2048 max_steps: 100000 model: _target_: src.esc_ed_module.ESCModule additional_special_tokens: [] transformer_model: ${transformer_model} attention_window: 64 modify_global_attention: true optim_conf: _target_: classy.optim.factories.RAdamFactory lr: 1.0e-05 weight_decay: 0.01 no_decay_params: - bias - LayerNorm.weight data: datamodule: train_dataset: _target_: src.data.esc_ed_dataset.ESCEDDataset.from_file transformer_model: ${transformer_model} additional_special_tokens: ${model.additional_special_tokens} candidates_separator: ${candidates_separator} shuffle_candidates_prob: 0.0 min_length: 0 max_length: 1024 tokens_per_batch: 1024 max_batch_size: 10 section_size: 20000 prebatch: true materialize: false for_inference: false validation_dataset: _target_: src.data.esc_ed_dataset.ESCEDDataset.from_file transformer_model: ${transformer_model} additional_special_tokens: ${model.additional_special_tokens} candidates_separator: ${candidates_separator} min_length: 0 max_length: 1024 tokens_per_batch: 2048 max_batch_size: 10 section_size: 10000 prebatch: true materialize: true for_inference: false shuffle_dataset: true prediction: dataset: _target_: src.data.esc_ed_dataset.ESCEDDataset.from_samples transformer_model: ${transformer_model} additional_special_tokens: ${model.additional_special_tokens} candidates_separator: ${candidates_separator} min_length: -1 max_length: -1 tokens_per_batch: 2048 max_batch_size: -1 section_size: 10000 prebatch: true materialize: false for_inference: true