|
_n_gpu: 1 |
|
adafactor: false |
|
adam_beta1: 0.9 |
|
adam_beta2: 0.999 |
|
adam_epsilon: 1.0e-08 |
|
cache_dir: null |
|
dataloader_drop_last: false |
|
dataloader_num_workers: 0 |
|
dataloader_pin_memory: true |
|
ddp_find_unused_parameters: null |
|
debug: [] |
|
deepspeed: null |
|
disable_tqdm: false |
|
do_eval: true |
|
do_predict: false |
|
do_train: true |
|
eval_accumulation_steps: 1 |
|
eval_dataset_list: |
|
- tquad2-valid |
|
- xquad.tr |
|
eval_steps: 300 |
|
evaluation_strategy: &id001 !!python/object/apply:transformers.trainer_utils.IntervalStrategy |
|
- steps |
|
fp16: false |
|
fp16_backend: auto |
|
fp16_full_eval: false |
|
fp16_opt_level: O1 |
|
freeze_embeddings: false |
|
gradient_accumulation_steps: 4 |
|
greater_is_better: null |
|
group_by_length: false |
|
ignore_data_skip: false |
|
label_names: null |
|
label_smoothing_factor: 0 |
|
learning_rate: 0.001 |
|
length_column_name: length |
|
load_best_model_at_end: false |
|
local_rank: -1 |
|
log_level: -1 |
|
log_level_replica: -1 |
|
log_on_each_node: true |
|
logging_dir: null |
|
logging_first_step: false |
|
logging_steps: 500 |
|
logging_strategy: *id001 |
|
lr_scheduler_type: !!python/object/apply:transformers.trainer_utils.SchedulerType |
|
- linear |
|
max_grad_norm: 1.0 |
|
max_source_length: 512 |
|
max_steps: -1 |
|
max_target_length: 64 |
|
metric_for_best_model: null |
|
model_name_or_path: google/mt5-small |
|
model_type: mt5 |
|
mp_parameters: '' |
|
mt5_qg_format: both |
|
mt5_task_list: |
|
- qa |
|
- qg |
|
- ans_ext |
|
neptune_api_token: null |
|
neptune_project: null |
|
neptune_run: null |
|
no_cuda: false |
|
num_train_epochs: 15 |
|
output_dir: runs/mt5-small/3task/adamw-1e3-15ep-both-tquad2train |
|
overwrite_output_dir: false |
|
past_index: -1 |
|
per_device_eval_batch_size: 64 |
|
per_device_train_batch_size: 64 |
|
per_gpu_eval_batch_size: null |
|
per_gpu_train_batch_size: null |
|
prediction_loss_only: false |
|
prepare_data: true |
|
push_to_hub: false |
|
push_to_hub_model_id: adamw-1e3-15ep-both-tquad2train |
|
push_to_hub_organization: null |
|
push_to_hub_token: null |
|
remove_unused_columns: false |
|
report_to: |
|
- wandb |
|
- neptune |
|
resume_from_checkpoint: null |
|
run_name: turque-mt5small-adamw-1e3-15ep-tquad2train |
|
save_on_each_node: false |
|
save_steps: 500 |
|
save_strategy: *id001 |
|
save_total_limit: 1 |
|
seed: 42 |
|
sharded_ddp: [] |
|
skip_memory_metrics: true |
|
tokenizer_path: tokenizers/mt5-small |
|
tpu_metrics_debug: false |
|
tpu_num_cores: null |
|
train_dataset_list: |
|
- tquad2-train |
|
train_file_path: data/train_data.pt |
|
use_legacy_prediction_loop: false |
|
valid_dataset_list: |
|
- tquad2-valid |
|
valid_file_path: data/valid_data.pt |
|
wandb_id: null |
|
wandb_project: turkish-qa-qg |
|
warmup_ratio: 0.0 |
|
warmup_steps: 0 |
|
weight_decay: 0.0 |
|
|