File size: 2,055 Bytes
674b430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
{
    "data": "ling_conversion",
    "data_sources": ["qqp", "mrpc", "stsb"],
    "data_type": "text",
    "kld_annealing": "cyclic",
    "lingpred_annealing": "mono",
    "ling_embed_type": "one-layer",
    "combine_weight": 1,
    "alpha_kld": 1,
    "alpha_lingpred": 1,
    "alpha_sem": 1,
    "max_grad_norm": 10,
    "sem_loss_tao": 0.5,
    "sem_loss_eps": 1,
    "ckpt": "./ckpt/model.pt",
    "disc_type": "deberta",
    "disc_ckpt": "./ckpt/ling_disc",
    "sem_ckpt": "./ckpt/sem_emb.pt",
    "lng_ids": null,
    "lng_ids_idx": null,
    "model_name": "google/flan-t5-base",
    "aim_exp": "lingconv-0606",
    "sem_loss_type": "dedicated",
    "combine_method": "decoder_add_first",
    "train_log": 200,
    "val_log": 2000,
    "batch_size": 80,
    "eval_batch_size": 200,
    "max_eval_samples": 1000,
    "test_batch_size": 1,
    "hidden_dim": 500,
    "latent_dim": 150,
    "lng_dim": 40,
    "disc_lng_dim": 40,
    "use_lora": false,
    "lora_r": 64,
    "gpu": "4",
    "epochs": 20,
    "grad_accumulation": 1,
    "n_ica": 10,
    "max_length": 200,
    "total_steps": null,
    "kld_const": 1,
    "lr": 0.001,
    "kl_weight": 0.1,
    "weight_decay": 0.01,
    "ling_dropout": 0.1,
    "predict_fn": "logs/test.txt",
    "save_predict": false,
    "use_ica": false,
    "pretrain_gen": false,
    "pretrain_sem": false,
    "pretrain_disc": false,
    "linggen_type": "none",
    "linggen_input": "s+l",
    "aug_same": false,
    "ling_vae": false,
    "process_lingpred": false,
    "fudge_lambda": 1.0,
    "use_lingpred": false,
    "ling2_only": true,
    "cycle_loss": false,
    "disc_loss": false,
    "sem_loss": false,
    "sim_loss": false,
    "optuna": false,
    "debug": false,
    "demo": false,
    "fudge": false,
    "out_fn": "logs/default",
    "eval_only": false,
    "predict_with_feedback": false,
    "feedback_param": "s",
    "eval_ling": false,
    "seed": 0,
    "major_arg": 0,
    "quantize_lng": false,
    "quant_nbins": 20,
    "src_lng": "ling",
    "to_restore": [],
    "disc_steps": 0
}