Spaces:
Sleeping
Sleeping
from ddpg import Agent | |
import gymnasium as gym | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import torch | |
from captum.attr import (IntegratedGradients) | |
from gymnasium.wrappers import RecordVideo | |
class TrainingLoop: | |
def __init__(self, env_spec, output_path='./output/', seed=0, **kwargs): | |
assert env_spec in gym.envs.registry.keys() | |
self.defaults = { | |
"id": env_spec, | |
"continuous": True, | |
"gravity": -10.0, | |
"render_mode": None | |
} | |
self.env = gym.make( | |
**self.defaults | |
) | |
self.defaults.update(**kwargs) | |
torch.manual_seed(seed) | |
self.agent = None | |
self.output_path = output_path | |
# TODO: spec-to-hyperparameters look-up | |
def create_agent(self, alpha=0.000025, beta=0.00025, input_dims=[8], tau=0.001, batch_size=64, layer1_size=400, layer2_size=300, n_actions=4): | |
self.agent = Agent(alpha=alpha, beta=beta, input_dims=input_dims, tau=tau, env=self.env, batch_size=batch_size, layer1_size=layer1_size, layer2_size=layer2_size, n_actions=n_actions) | |
def train(self): | |
assert self.agent is not None | |
self.defaults["render_mode"] = None | |
self.env = gym.make( | |
**self.defaults | |
) | |
# self.agent.load_models() | |
score_history = [] | |
for i in range(10000): | |
done = False | |
score = 0 | |
obs, _ = self.env.reset() | |
while not done: | |
act = self.agent.choose_action(obs) | |
new_state, reward, terminated, truncated, info = self.env.step(act) | |
done = terminated or truncated | |
self.agent.remember(obs, act, reward, new_state, int(done)) | |
self.agent.learn() | |
score += reward | |
obs = new_state | |
score_history.append(score) | |
print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:])) | |
if i % 25 == 0: | |
self.agent.save_models() | |
self.env.close() | |
def load_trained(self): | |
assert self.agent is not None | |
self.defaults["render_mode"] = None | |
self.env = gym.make( | |
**self.defaults | |
) | |
self.agent.load_models() | |
score_history = [] | |
for i in range(50): | |
done = False | |
score = 0 | |
obs, _ = self.env.reset() | |
while not done: | |
act = self.agent.choose_action(obs) | |
new_state, reward, terminated, truncated, info = self.env.step(act) | |
done = terminated or truncated | |
score += reward | |
obs = new_state | |
score_history.append(score) | |
print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:])) | |
self.env.close() | |
# Video Recording | |
# def render_video(self, episode_trigger=100): | |
# assert self.agent is not None | |
# self.defaults["render_mode"] = "rgb_array" | |
# self.env = gym.make( | |
# **self.defaults | |
# ) | |
# episode_trigger_callable = lambda x: x % episode_trigger == 0 | |
# self.env = RecordVideo(env=self.env, video_folder=self.output_path, name_prefix=f"{self.defaults['id']}-recording", episode_trigger=episode_trigger_callable, disable_logger=True) | |
# self.agent.load_models() | |
# score_history = [] | |
# for i in range(200): | |
# done = False | |
# score = 0 | |
# obs, _ = self.env.reset() | |
# while not done: | |
# act = self.agent.choose_action(observation=obs) | |
# new_state, reward, terminated, truncated, info = self.env.step(act) | |
# done = terminated or truncated | |
# score += reward | |
# obs = new_state | |
# score_history.append(score) | |
# print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:])) | |
# self.env.close() | |
# Model Explainability | |
from captum.attr import (IntegratedGradients) | |
def _collect_running_baseline_average(self, num_iterations: int) -> torch.Tensor: | |
assert self.agent is not None | |
self.defaults["render_mode"] = None | |
self.env = gym.make( | |
**self.defaults | |
) | |
print("--------- Collecting running baseline average ----------") | |
self.agent.load_models() | |
sum_obs = torch.zeros(8) | |
for i in range(num_iterations): | |
done = False | |
score = 0 | |
obs, _ = self.env.reset() | |
sum_obs += obs | |
# print(f"Baseline on interation #{i}: {obs}") | |
while not done: | |
act = self.agent.choose_action(obs, baseline=None) | |
new_state, reward, terminated, truncated, info = self.env.step(act) | |
done = terminated or truncated | |
score += reward | |
obs = new_state | |
print(f"Baseline collected: {sum_obs / num_iterations}") | |
self.env.close() | |
return sum_obs / num_iterations | |
def explain_trained(self, option: str, num_iterations :int = 10) -> None: | |
assert self.agent is not None | |
baseline_options = { | |
0: torch.zeros(8), | |
1: self._collect_running_baseline_average(num_iterations), | |
} | |
baseline = baseline_options[option] | |
self.defaults["render_mode"] = "rgb_array" | |
self.env = gym.make( | |
**self.defaults | |
) | |
print("\n\n\n\n--------- Performing Attributions -----------") | |
self.agent.load_models() | |
print(self.agent.actor) | |
ig = IntegratedGradients(self.agent.actor) | |
self.agent.ig = ig | |
score_history = [] | |
frames = [] | |
for i in range(10): | |
done = False | |
score = 0 | |
obs, _ = self.env.reset() | |
while not done: | |
frames.append(self.env.render()) | |
act = self.agent.choose_action(observation=obs, baseline=baseline) | |
new_state, reward, terminated, truncated, info = self.env.step(act) | |
done = terminated or truncated | |
score += reward | |
obs = new_state | |
score_history.append(score) | |
print("episode", i, "score %.2f" % score, "100 game average %.2f" % np.mean(score_history[-100:])) | |
self.env.close() | |
try: | |
assert len(frames) == len(self.agent.attributions) | |
except AssertionError: | |
print("Frames and agent attribution history are not the same shape!") | |
else: | |
pass | |
return (frames, self.agent.attributions) | |