Upload 3 files

Browse files

Files changed (4) hide show

.gitattributes +1 -0
config_saved.json +1 -0
supervised.pol.mdl +3 -0
train_INFO.log +345 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+supervised.pol.mdl filter=lfs diff=lfs merge=lfs -text

config_saved.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"args": {"seed": 0, "eval_freq": 2, "dataset_name": "multiwoz21", "model_path": "experiments/seed0/save/supervised.pol.mdl"}, "config": {"batchsz": 64, "epoch": 40, "gamma": 0.99, "policy_lr": 5e-06, "supervised_lr": 1e-05, "entropy_weight": 0.01, "value_lr": 0.0001, "save_dir": "save", "log_dir": "log", "save_per_epoch": 5000, "hidden_size": 256, "load": "save/best", "logging_mode": "INFO", "use_cer": true, "memory_size": 5000, "behaviour_cloning_weight": 0.1, "supervised_weight": 0.0, "online_offline_ratio": 0.2, "smoothed_value_function": false, "use_reservoir_sampling": false, "seed": 0, "lambda": 1, "tau": 0.001, "policy_freq": 1, "print_per_batch": 400, "c": 1.0, "rho_bar": 1, "max_length": 10, "noisy_linear": false, "dataset_name": "multiwoz21", "data_percentage": 0.01, "dialogue_order": 0, "multiwoz_like": false, "regularization_weight": 0.0, "enc_input_dim": 128, "enc_nhead": 2, "enc_d_hid": 128, "enc_nlayers": 4, "enc_dropout": 0.1, "dec_input_dim": 128, "dec_nhead": 2, "dec_d_hid": 128, "dec_nlayers": 2, "dec_dropout": 0.0, "action_embedding_dim": 128, "domain_embedding_dim": 64, "value_embedding_dim": 12, "node_embedding_dim": 128, "roberta_path": "", "node_attention": true, "semantic_descriptions": true, "freeze_roberta": true, "use_pooled": false, "mean": true, "roberta_actions": true, "independent_descriptions": true, "random_matrix": false, "distance_metric": false, "verbose": false, "ignore_features": [], "domains_removed": ["hospital", "police", "train", "hotel", "attraction", "taxi"], "only_active_values": false, "permuted_data": false, "need_weights": false, "cls_dim": 128, "independent": true, "old_critic": false, "pos_weight": 5, "weight_decay": 1e-05}, "policy_config": null}

supervised.pol.mdl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:167f64fd660907849c157f0423778600b6613ce4c6fc98247484c0e279b36206
+size 9331458

train_INFO.log ADDED Viewed

	@@ -0,0 +1,345 @@

+Visible device: cuda
+Seed used: 0
+Batch size: 64
+Epochs: 40
+Learning rate: 1e-05
+Entropy weight: 0.01
+Regularization weight: 0.0
+Only use multiwoz like domains: False
+We use: 1.0% of the data
+Dialogue order used: 0
+Vectorizer: Data set used is multiwoz21
+We filter state by active domains: True
+Vectorizer: Data set used is multiwoz21
+Embedding semantic descriptions: True
+Embedded descriptions successfully. Size: torch.Size([338, 768])
+Data set used for descriptions: multiwoz21
+We use Roberta to embed actions.
+Loaded model from experiments/seed0/save/supervised.pol.mdl
+Start training
+Epoch: 0
+Average actions: 1.9973957538604736
+Average target actions: 2.5520834922790527
+Precision: 0.09615384615384616
+Recall: 0.07462686567164178
+F1: 0.08403361344537816
+<<dialog policy>> epoch 0: saved network to mdl
+Best Precision: 0.09615384615384616
+Best Recall: 0.07462686567164178
+Best F1: 0.08403361344537816
+Epoch: 1
+Precision: 0.09615384615384616
+Recall: 0.07462686567164178
+F1: 0.08403361344537816
+Best Precision: 0.09615384615384616
+Best Recall: 0.07462686567164178
+Best F1: 0.08403361344537816
+Epoch: 2
+Average actions: 2.3515625
+Average target actions: 2.6197917461395264
+Precision: 0.10526315789473684
+Recall: 0.08955223880597014
+F1: 0.0967741935483871
+<<dialog policy>> epoch 2: saved network to mdl
+Best Precision: 0.10526315789473684
+Best Recall: 0.08955223880597014
+Best F1: 0.0967741935483871
+Epoch: 3
+Precision: 0.10526315789473684
+Recall: 0.08955223880597014
+F1: 0.0967741935483871
+Best Precision: 0.10526315789473684
+Best Recall: 0.08955223880597014
+Best F1: 0.0967741935483871
+Epoch: 4
+Average actions: 1.6770832538604736
+Average target actions: 2.8567709922790527
+Precision: 0.1347517730496454
+Recall: 0.0945273631840796
+F1: 0.11111111111111112
+<<dialog policy>> epoch 4: saved network to mdl
+Best Precision: 0.1347517730496454
+Best Recall: 0.0945273631840796
+Best F1: 0.11111111111111112
+Epoch: 5
+Precision: 0.1347517730496454
+Recall: 0.0945273631840796
+F1: 0.11111111111111112
+Best Precision: 0.1347517730496454
+Best Recall: 0.0945273631840796
+Best F1: 0.11111111111111112
+Epoch: 6
+Average actions: 1.9088542461395264
+Average target actions: 2.7213542461395264
+Precision: 0.12080536912751678
+Recall: 0.08955223880597014
+F1: 0.10285714285714286
+Best Precision: 0.1347517730496454
+Best Recall: 0.0945273631840796
+Best F1: 0.11111111111111112
+Epoch: 7
+Precision: 0.12080536912751678
+Recall: 0.08955223880597014
+F1: 0.10285714285714286
+Best Precision: 0.1347517730496454
+Best Recall: 0.0945273631840796
+Best F1: 0.11111111111111112
+Epoch: 8
+Average actions: 2.0572915077209473
+Average target actions: 2.8229167461395264
+Precision: 0.12903225806451613
+Recall: 0.09950248756218906
+F1: 0.11235955056179776
+<<dialog policy>> epoch 8: saved network to mdl
+Best Precision: 0.1347517730496454
+Best Recall: 0.09950248756218906
+Best F1: 0.11235955056179776
+Epoch: 9
+Precision: 0.12903225806451613
+Recall: 0.09950248756218906
+F1: 0.11235955056179776
+Best Precision: 0.1347517730496454
+Best Recall: 0.09950248756218906
+Best F1: 0.11235955056179776
+Epoch: 10
+Average actions: 2.0911459922790527
+Average target actions: 2.6875
+Precision: 0.11612903225806452
+Recall: 0.08955223880597014
+F1: 0.10112359550561797
+Best Precision: 0.1347517730496454
+Best Recall: 0.09950248756218906
+Best F1: 0.11235955056179776
+Epoch: 11
+Precision: 0.11612903225806452
+Recall: 0.08955223880597014
+F1: 0.10112359550561797
+Best Precision: 0.1347517730496454
+Best Recall: 0.09950248756218906
+Best F1: 0.11235955056179776
+Epoch: 12
+Average actions: 2.0833332538604736
+Average target actions: 2.5859375
+Precision: 0.11976047904191617
+Recall: 0.09950248756218906
+F1: 0.10869565217391305
+Best Precision: 0.1347517730496454
+Best Recall: 0.09950248756218906
+Best F1: 0.11235955056179776
+Epoch: 13
+Precision: 0.11976047904191617
+Recall: 0.09950248756218906
+F1: 0.10869565217391305
+Best Precision: 0.1347517730496454
+Best Recall: 0.09950248756218906
+Best F1: 0.11235955056179776
+Epoch: 14
+Average actions: 2.1119790077209473
+Average target actions: 2.7213542461395264
+Precision: 0.16778523489932887
+Recall: 0.12437810945273632
+F1: 0.14285714285714285
+<<dialog policy>> epoch 14: saved network to mdl
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 15
+Precision: 0.16778523489932887
+Recall: 0.12437810945273632
+F1: 0.14285714285714285
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 16
+Average actions: 1.7994792461395264
+Average target actions: 2.5520834922790527
+Precision: 0.10135135135135136
+Recall: 0.07462686567164178
+F1: 0.08595988538681948
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 17
+Precision: 0.10135135135135136
+Recall: 0.07462686567164178
+F1: 0.08595988538681948
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 18
+Average actions: 2.0572915077209473
+Average target actions: 2.7552084922790527
+Precision: 0.13548387096774195
+Recall: 0.1044776119402985
+F1: 0.11797752808988765
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 19
+Precision: 0.13548387096774195
+Recall: 0.1044776119402985
+F1: 0.11797752808988765
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 20
+Average actions: 1.9661457538604736
+Average target actions: 2.7213542461395264
+Precision: 0.1118421052631579
+Recall: 0.0845771144278607
+F1: 0.0963172804532578
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 21
+Precision: 0.1118421052631579
+Recall: 0.0845771144278607
+F1: 0.0963172804532578
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 22
+Average actions: 1.9557292461395264
+Average target actions: 2.5520834922790527
+Precision: 0.07741935483870968
+Recall: 0.05970149253731343
+F1: 0.06741573033707865
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 23
+Precision: 0.07741935483870968
+Recall: 0.05970149253731343
+F1: 0.06741573033707865
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 24
+Average actions: 2.0833334922790527
+Average target actions: 2.8229167461395264
+Precision: 0.09090909090909091
+Recall: 0.06965174129353234
+F1: 0.07887323943661972
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 25
+Precision: 0.09090909090909091
+Recall: 0.06965174129353234
+F1: 0.07887323943661972
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 26
+Average actions: 1.7135417461395264
+Average target actions: 2.6197917461395264
+Precision: 0.145985401459854
+Recall: 0.09950248756218906
+F1: 0.1183431952662722
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 27
+Precision: 0.145985401459854
+Recall: 0.09950248756218906
+F1: 0.1183431952662722
+Best Precision: 0.16778523489932887
+Best Recall: 0.12437810945273632
+Best F1: 0.14285714285714285
+Epoch: 28
+Average actions: 2.0364584922790527
+Average target actions: 2.5520834922790527
+Precision: 0.16891891891891891
+Recall: 0.12437810945273632
+F1: 0.14326647564469916
+<<dialog policy>> epoch 28: saved network to mdl
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 29
+Precision: 0.16891891891891891
+Recall: 0.12437810945273632
+F1: 0.14326647564469916
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 30
+Average actions: 2.0026040077209473
+Average target actions: 2.3828125
+Precision: 0.16216216216216217
+Recall: 0.11940298507462686
+F1: 0.13753581661891118
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 31
+Precision: 0.16216216216216217
+Recall: 0.11940298507462686
+F1: 0.13753581661891118
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 32
+Average actions: 1.8046875
+Average target actions: 2.6875
+Precision: 0.12142857142857143
+Recall: 0.0845771144278607
+F1: 0.09970674486803519
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 33
+Precision: 0.12142857142857143
+Recall: 0.0845771144278607
+F1: 0.09970674486803519
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 34
+Average actions: 1.9348957538604736
+Average target actions: 2.6875
+Precision: 0.12162162162162163
+Recall: 0.08955223880597014
+F1: 0.10315186246418337
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 35
+Precision: 0.12162162162162163
+Recall: 0.08955223880597014
+F1: 0.10315186246418337
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 36
+Average actions: 2.0989584922790527
+Average target actions: 2.484375
+Precision: 0.14743589743589744
+Recall: 0.11442786069651742
+F1: 0.1288515406162465
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 37
+Precision: 0.14743589743589744
+Recall: 0.11442786069651742
+F1: 0.1288515406162465
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 38
+Average actions: 2.0260415077209473
+Average target actions: 2.5520834922790527
+Precision: 0.1456953642384106
+Recall: 0.10945273631840796
+F1: 0.12499999999999997
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916
+Epoch: 39
+Precision: 0.1456953642384106
+Recall: 0.10945273631840796
+F1: 0.12499999999999997
+Best Precision: 0.16891891891891891
+Best Recall: 0.12437810945273632
+Best F1: 0.14326647564469916