Spaces:
Sleeping
Sleeping
import os | |
import argparse | |
import yaml | |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
# basic settings | |
parser.add_argument('--seed', default=1234, type=int) | |
parser.add_argument('--savepath', type=str, default="blip_uni_cross_mu", help='') | |
# board settings | |
parser.add_argument("--board_width", type=int,default=9) | |
parser.add_argument("--board_height", type=int,default=9) | |
parser.add_argument("--n_in_row", type=int,default=5,help="the condition of winning") | |
# device settings | |
parser.add_argument('--config', type=str, default='config/config.yaml', help='Path to the config file.') | |
parser.add_argument('--gpu_num', type=int, default=1) | |
parser.add_argument('--gpu_id', type=str, default='5') | |
# save options | |
parser.add_argument('--clear_visualizer', dest='clear_visualizer', action='store_true') | |
parser.add_argument('--std_log', dest='std_log', action='store_true') | |
# mode settings | |
parser.add_argument("--split",type=str,default="train",help="the mode of woker") | |
# train settings | |
parser.add_argument("--expri",type=str, default="",help="the name of experiment") | |
parser.add_argument("--learn_rate", type=float,default=2e-3) | |
parser.add_argument("--l2_const",type=float,default=1e-4) | |
# ??? | |
parser.add_argument("--lr_multiplier", type=float,default= 1.0 ,help="adaptively adjust the learning rate based on KL") | |
parser.add_argument("--buffer_size",type=int,default=10000,help="The size of collection of game data ") | |
parser.add_argument("--batch_size",type=int,default=512) | |
parser.add_argument("--play_batch_size",type=int, default=1,help="The time of selfplaying when collect the data") | |
parser.add_argument("--epochs",type=int,default=5,help="num of train_steps for each update") | |
parser.add_argument("--kl_targ",type=float,default=0.02,help="the target kl distance between the old decision function and the new decision function ") | |
parser.add_argument("--check_freq",type=int,default=50,help='the frequence of the checking the win ratio when training') | |
parser.add_argument("--game_batch_num",type=int,default=1500,help = "the total training times") | |
# parser.add_argument("--l2_const",type=float,default=1e-4,help=" coef of l2 penalty") | |
parser.add_argument("--distributed",type=bool,default=False) | |
# preload_model setting | |
parser.add_argument("--preload_model",type=str, default="") | |
# Alphazero agent setting | |
parser.add_argument("--temp", type=float,default= 1.0 ,help="the temperature parameter when calculate the decision function getting the next action") | |
parser.add_argument("--n_playout",type=int, default=200, help="num of simulations for each move ") | |
parser.add_argument("--c_puct",type=int, default=5, help= "the balance parameter between exploration and exploitative ") | |
# prue_mcts agent setting | |
parser.add_argument("--pure_mcts_playout_num",type=int, default=200) | |
# test settings | |
parser.add_argument('--test_ckpt', type=str, default=None, help='ckpt absolute path') | |
opts = parser.parse_args() | |
# additional parameters | |
current_path = os.path.abspath(__file__) | |
grandfather_path = os.path.abspath(os.path.dirname(os.path.dirname(current_path)) + os.path.sep + ".") | |
with open(os.path.join(grandfather_path, opts.config), 'r') as stream: | |
config = yaml.full_load(stream) | |