File size: 5,459 Bytes
b3dc3b1 03bb9cd b3dc3b1 03bb9cd b3dc3b1 9b738a9 b3dc3b1 9b738a9 b3dc3b1 9b738a9 b3dc3b1 9b738a9 03bb9cd b3dc3b1 9b738a9 b3dc3b1 03bb9cd b3dc3b1 9b738a9 03bb9cd b3dc3b1 03bb9cd b3dc3b1 03bb9cd b3dc3b1 03bb9cd b3dc3b1 03bb9cd b3dc3b1 9b738a9 03bb9cd b3dc3b1 03bb9cd b3dc3b1 03bb9cd b3dc3b1 03bb9cd b3dc3b1 03bb9cd b3dc3b1 03bb9cd b3dc3b1 03bb9cd b3dc3b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# Support for PyTorch mps mode (https://pytorch.org/docs/stable/notes/mps.html)
import os
from rl_algo_impls.shared.callbacks import Callback
from rl_algo_impls.shared.callbacks.self_play_callback import SelfPlayCallback
from rl_algo_impls.wrappers.self_play_wrapper import SelfPlayWrapper
from rl_algo_impls.wrappers.vectorable_wrapper import find_wrapper
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
import dataclasses
import shutil
from dataclasses import asdict, dataclass
from typing import Any, Dict, List, Optional, Sequence
import yaml
from torch.utils.tensorboard.writer import SummaryWriter
import wandb
from rl_algo_impls.runner.config import Config, EnvHyperparams, RunArgs
from rl_algo_impls.runner.running_utils import (
ALGOS,
get_device,
hparam_dict,
load_hyperparams,
make_policy,
plot_eval_callback,
set_seeds,
)
from rl_algo_impls.shared.callbacks.eval_callback import EvalCallback
from rl_algo_impls.shared.callbacks.microrts_reward_decay_callback import (
MicrortsRewardDecayCallback,
)
from rl_algo_impls.shared.stats import EpisodesStats
from rl_algo_impls.shared.vec_env import make_env, make_eval_env
@dataclass
class TrainArgs(RunArgs):
wandb_project_name: Optional[str] = None
wandb_entity: Optional[str] = None
wandb_tags: Sequence[str] = dataclasses.field(default_factory=list)
wandb_group: Optional[str] = None
def train(args: TrainArgs):
print(args)
hyperparams = load_hyperparams(args.algo, args.env)
print(hyperparams)
config = Config(args, hyperparams, os.getcwd())
wandb_enabled = bool(args.wandb_project_name)
if wandb_enabled:
wandb.tensorboard.patch(
root_logdir=config.tensorboard_summary_path, pytorch=True
)
wandb.init(
project=args.wandb_project_name,
entity=args.wandb_entity,
config=asdict(hyperparams),
name=config.run_name(),
monitor_gym=True,
save_code=True,
tags=args.wandb_tags,
group=args.wandb_group,
)
wandb.config.update(args)
tb_writer = SummaryWriter(config.tensorboard_summary_path)
set_seeds(args.seed, args.use_deterministic_algorithms)
env = make_env(
config, EnvHyperparams(**config.env_hyperparams), tb_writer=tb_writer
)
device = get_device(config, env)
policy_factory = lambda: make_policy(
args.algo, env, device, **config.policy_hyperparams
)
policy = policy_factory()
algo = ALGOS[args.algo](policy, env, device, tb_writer, **config.algo_hyperparams)
num_parameters = policy.num_parameters()
num_trainable_parameters = policy.num_trainable_parameters()
if wandb_enabled:
wandb.run.summary["num_parameters"] = num_parameters # type: ignore
wandb.run.summary["num_trainable_parameters"] = num_trainable_parameters # type: ignore
else:
print(
f"num_parameters = {num_parameters} ; "
f"num_trainable_parameters = {num_trainable_parameters}"
)
eval_env = make_eval_env(config, EnvHyperparams(**config.env_hyperparams))
record_best_videos = config.eval_hyperparams.get("record_best_videos", True)
eval_callback = EvalCallback(
policy,
eval_env,
tb_writer,
best_model_path=config.model_dir_path(best=True),
**config.eval_callback_params(),
video_env=make_eval_env(
config,
EnvHyperparams(**config.env_hyperparams),
override_hparams={"n_envs": 1},
)
if record_best_videos
else None,
best_video_dir=config.best_videos_dir,
additional_keys_to_log=config.additional_keys_to_log,
wandb_enabled=wandb_enabled,
)
callbacks: List[Callback] = [eval_callback]
if config.hyperparams.microrts_reward_decay_callback:
callbacks.append(MicrortsRewardDecayCallback(config, env))
selfPlayWrapper = find_wrapper(env, SelfPlayWrapper)
if selfPlayWrapper:
callbacks.append(SelfPlayCallback(policy, policy_factory, selfPlayWrapper))
algo.learn(config.n_timesteps, callbacks=callbacks)
policy.save(config.model_dir_path(best=False))
eval_stats = eval_callback.evaluate(n_episodes=10, print_returns=True)
plot_eval_callback(eval_callback, tb_writer, config.run_name())
log_dict: Dict[str, Any] = {
"eval": eval_stats._asdict(),
}
if eval_callback.best:
log_dict["best_eval"] = eval_callback.best._asdict()
log_dict.update(asdict(hyperparams))
log_dict.update(vars(args))
with open(config.logs_path, "a") as f:
yaml.dump({config.run_name(): log_dict}, f)
best_eval_stats: EpisodesStats = eval_callback.best # type: ignore
tb_writer.add_hparams(
hparam_dict(hyperparams, vars(args)),
{
"hparam/best_mean": best_eval_stats.score.mean,
"hparam/best_result": best_eval_stats.score.mean
- best_eval_stats.score.std,
"hparam/last_mean": eval_stats.score.mean,
"hparam/last_result": eval_stats.score.mean - eval_stats.score.std,
},
None,
config.run_name(),
)
tb_writer.close()
if wandb_enabled:
shutil.make_archive(
os.path.join(wandb.run.dir, config.model_dir_name()), # type: ignore
"zip",
config.model_dir_path(),
)
wandb.finish()
|