LingConv / ckpt /model.json
mohdelgaar's picture
Update layout and samples
674b430
raw
history blame
2.06 kB
{
"data": "ling_conversion",
"data_sources": ["qqp", "mrpc", "stsb"],
"data_type": "text",
"kld_annealing": "cyclic",
"lingpred_annealing": "mono",
"ling_embed_type": "one-layer",
"combine_weight": 1,
"alpha_kld": 1,
"alpha_lingpred": 1,
"alpha_sem": 1,
"max_grad_norm": 10,
"sem_loss_tao": 0.5,
"sem_loss_eps": 1,
"ckpt": "./ckpt/model.pt",
"disc_type": "deberta",
"disc_ckpt": "./ckpt/ling_disc",
"sem_ckpt": "./ckpt/sem_emb.pt",
"lng_ids": null,
"lng_ids_idx": null,
"model_name": "google/flan-t5-base",
"aim_exp": "lingconv-0606",
"sem_loss_type": "dedicated",
"combine_method": "decoder_add_first",
"train_log": 200,
"val_log": 2000,
"batch_size": 80,
"eval_batch_size": 200,
"max_eval_samples": 1000,
"test_batch_size": 1,
"hidden_dim": 500,
"latent_dim": 150,
"lng_dim": 40,
"disc_lng_dim": 40,
"use_lora": false,
"lora_r": 64,
"gpu": "4",
"epochs": 20,
"grad_accumulation": 1,
"n_ica": 10,
"max_length": 200,
"total_steps": null,
"kld_const": 1,
"lr": 0.001,
"kl_weight": 0.1,
"weight_decay": 0.01,
"ling_dropout": 0.1,
"predict_fn": "logs/test.txt",
"save_predict": false,
"use_ica": false,
"pretrain_gen": false,
"pretrain_sem": false,
"pretrain_disc": false,
"linggen_type": "none",
"linggen_input": "s+l",
"aug_same": false,
"ling_vae": false,
"process_lingpred": false,
"fudge_lambda": 1.0,
"use_lingpred": false,
"ling2_only": true,
"cycle_loss": false,
"disc_loss": false,
"sem_loss": false,
"sim_loss": false,
"optuna": false,
"debug": false,
"demo": false,
"fudge": false,
"out_fn": "logs/default",
"eval_only": false,
"predict_with_feedback": false,
"feedback_param": "s",
"eval_ling": false,
"seed": 0,
"major_arg": 0,
"quantize_lng": false,
"quant_nbins": 20,
"src_lng": "ling",
"to_restore": [],
"disc_steps": 0
}