File size: 3,233 Bytes
172a1e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

import os
import argparse
import yaml

parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

# basic settings
parser.add_argument('--seed', default=1234, type=int)
parser.add_argument('--savepath', type=str, default="blip_uni_cross_mu", help='')


# board settings
parser.add_argument("--board_width", type=int,default=9)
parser.add_argument("--board_height", type=int,default=9)
parser.add_argument("--n_in_row", type=int,default=5,help="the condition of winning")


# device settings
parser.add_argument('--config', type=str, default='config/config.yaml', help='Path to the config file.')
parser.add_argument('--gpu_num', type=int, default=1)
parser.add_argument('--gpu_id', type=str, default='5')


# save options
parser.add_argument('--clear_visualizer', dest='clear_visualizer', action='store_true')
parser.add_argument('--std_log', dest='std_log', action='store_true')


# mode settings
parser.add_argument("--split",type=str,default="train",help="the mode of woker")


# train settings
parser.add_argument("--expri",type=str, default="",help="the name of experiment")
parser.add_argument("--learn_rate", type=float,default=2e-3)
parser.add_argument("--l2_const",type=float,default=1e-4)
# ???
parser.add_argument("--lr_multiplier", type=float,default= 1.0 ,help="adaptively adjust the learning rate based on KL")
parser.add_argument("--buffer_size",type=int,default=10000,help="The size of collection of game data ")
parser.add_argument("--batch_size",type=int,default=512)
parser.add_argument("--play_batch_size",type=int, default=1,help="The time of selfplaying when collect the data")
parser.add_argument("--epochs",type=int,default=5,help="num of train_steps for each update")
parser.add_argument("--kl_targ",type=float,default=0.02,help="the target kl distance between the old decision function and the new decision function ")
parser.add_argument("--check_freq",type=int,default=50,help='the frequence of the checking the win ratio when training')
parser.add_argument("--game_batch_num",type=int,default=1500,help =  "the total training times")


# parser.add_argument("--l2_const",type=float,default=1e-4,help=" coef of l2 penalty")
parser.add_argument("--distributed",type=bool,default=False)

# preload_model setting
parser.add_argument("--preload_model",type=str, default="")


# Alphazero  agent setting
parser.add_argument("--temp", type=float,default= 1.0 ,help="the temperature parameter when calculate the decision function getting the next action")
parser.add_argument("--n_playout",type=int, default=200, help="num of simulations for each move ")
parser.add_argument("--c_puct",type=int, default=5, help= "the balance parameter between exploration and exploitative ")

# prue_mcts agent setting
parser.add_argument("--pure_mcts_playout_num",type=int, default=200)

# test settings
parser.add_argument('--test_ckpt', type=str, default=None, help='ckpt absolute path')


opts = parser.parse_args()

# additional parameters
current_path = os.path.abspath(__file__)
grandfather_path = os.path.abspath(os.path.dirname(os.path.dirname(current_path)) + os.path.sep + ".")
with open(os.path.join(grandfather_path, opts.config), 'r') as stream:
    config = yaml.full_load(stream)