Upload . with huggingface_hub
Browse files- .summary/0/events.out.tfevents.1677095050.xps +3 -0
- README.md +56 -0
- checkpoint_p0/best_000000494_2023424_reward_4.772.pth +3 -0
- checkpoint_p0/checkpoint_000000836_3424256.pth +3 -0
- checkpoint_p0/checkpoint_000000978_4005888.pth +3 -0
- config.json +142 -0
- git.diff +0 -0
- sf_log.txt +865 -0
.summary/0/events.out.tfevents.1677095050.xps
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c330322bde110c48fd794e25ca7afd07a7146afbcd3c3538a32f9bd6d041a28a
|
3 |
+
size 671276
|
README.md
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: sample-factory
|
3 |
+
tags:
|
4 |
+
- deep-reinforcement-learning
|
5 |
+
- reinforcement-learning
|
6 |
+
- sample-factory
|
7 |
+
model-index:
|
8 |
+
- name: APPO
|
9 |
+
results:
|
10 |
+
- task:
|
11 |
+
type: reinforcement-learning
|
12 |
+
name: reinforcement-learning
|
13 |
+
dataset:
|
14 |
+
name: doom_health_gathering_supreme
|
15 |
+
type: doom_health_gathering_supreme
|
16 |
+
metrics:
|
17 |
+
- type: mean_reward
|
18 |
+
value: 3.94 +/- 0.88
|
19 |
+
name: mean_reward
|
20 |
+
verified: false
|
21 |
+
---
|
22 |
+
|
23 |
+
A(n) **APPO** model trained on the **doom_health_gathering_supreme** environment.
|
24 |
+
|
25 |
+
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
26 |
+
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
27 |
+
|
28 |
+
|
29 |
+
## Downloading the model
|
30 |
+
|
31 |
+
After installing Sample-Factory, download the model with:
|
32 |
+
```
|
33 |
+
python -m sample_factory.huggingface.load_from_hub -r chqmatteo/rl_course_vizdoom_health_gathering_supreme
|
34 |
+
```
|
35 |
+
|
36 |
+
|
37 |
+
## Using the model
|
38 |
+
|
39 |
+
To run the model after download, use the `enjoy` script corresponding to this environment:
|
40 |
+
```
|
41 |
+
python -m <path.to.enjoy.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme
|
42 |
+
```
|
43 |
+
|
44 |
+
|
45 |
+
You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
|
46 |
+
See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
47 |
+
|
48 |
+
## Training with this model
|
49 |
+
|
50 |
+
To continue training with this model, use the `train` script corresponding to this environment:
|
51 |
+
```
|
52 |
+
python -m <path.to.train.module> --algo=APPO --env=doom_health_gathering_supreme --train_dir=./train_dir --experiment=rl_course_vizdoom_health_gathering_supreme --restart_behavior=resume --train_for_env_steps=10000000000
|
53 |
+
```
|
54 |
+
|
55 |
+
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
56 |
+
|
checkpoint_p0/best_000000494_2023424_reward_4.772.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7922c48b91278eced8427efe9b5e2c9fa5b64a2984176b863ed33c9bc5710aa
|
3 |
+
size 34928614
|
checkpoint_p0/checkpoint_000000836_3424256.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:32521bc9a3fa7390e3847c73aeca68f406c7f6e9ee600c77b516b262fae7c753
|
3 |
+
size 34929028
|
checkpoint_p0/checkpoint_000000978_4005888.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa9c97ba1dc391ffd99a08009496bc73529021a7ffdd359bceaafd9db6628623
|
3 |
+
size 34929028
|
config.json
ADDED
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"help": false,
|
3 |
+
"algo": "APPO",
|
4 |
+
"env": "doom_health_gathering_supreme",
|
5 |
+
"experiment": "default_experiment",
|
6 |
+
"train_dir": "/mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir",
|
7 |
+
"restart_behavior": "resume",
|
8 |
+
"device": "gpu",
|
9 |
+
"seed": null,
|
10 |
+
"num_policies": 1,
|
11 |
+
"async_rl": true,
|
12 |
+
"serial_mode": false,
|
13 |
+
"batched_sampling": false,
|
14 |
+
"num_batches_to_accumulate": 2,
|
15 |
+
"worker_num_splits": 2,
|
16 |
+
"policy_workers_per_policy": 1,
|
17 |
+
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 8,
|
19 |
+
"num_envs_per_worker": 4,
|
20 |
+
"batch_size": 1024,
|
21 |
+
"num_batches_per_epoch": 1,
|
22 |
+
"num_epochs": 1,
|
23 |
+
"rollout": 32,
|
24 |
+
"recurrence": 32,
|
25 |
+
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.99,
|
27 |
+
"reward_scale": 1.0,
|
28 |
+
"reward_clip": 1000.0,
|
29 |
+
"value_bootstrap": false,
|
30 |
+
"normalize_returns": true,
|
31 |
+
"exploration_loss_coeff": 0.001,
|
32 |
+
"value_loss_coeff": 0.5,
|
33 |
+
"kl_loss_coeff": 0.0,
|
34 |
+
"exploration_loss": "symmetric_kl",
|
35 |
+
"gae_lambda": 0.95,
|
36 |
+
"ppo_clip_ratio": 0.1,
|
37 |
+
"ppo_clip_value": 0.2,
|
38 |
+
"with_vtrace": false,
|
39 |
+
"vtrace_rho": 1.0,
|
40 |
+
"vtrace_c": 1.0,
|
41 |
+
"optimizer": "adam",
|
42 |
+
"adam_eps": 1e-06,
|
43 |
+
"adam_beta1": 0.9,
|
44 |
+
"adam_beta2": 0.999,
|
45 |
+
"max_grad_norm": 4.0,
|
46 |
+
"learning_rate": 0.0001,
|
47 |
+
"lr_schedule": "constant",
|
48 |
+
"lr_schedule_kl_threshold": 0.008,
|
49 |
+
"lr_adaptive_min": 1e-06,
|
50 |
+
"lr_adaptive_max": 0.01,
|
51 |
+
"obs_subtract_mean": 0.0,
|
52 |
+
"obs_scale": 255.0,
|
53 |
+
"normalize_input": true,
|
54 |
+
"normalize_input_keys": null,
|
55 |
+
"decorrelate_experience_max_seconds": 0,
|
56 |
+
"decorrelate_envs_on_one_worker": true,
|
57 |
+
"actor_worker_gpus": [],
|
58 |
+
"set_workers_cpu_affinity": true,
|
59 |
+
"force_envs_single_thread": false,
|
60 |
+
"default_niceness": 0,
|
61 |
+
"log_to_file": true,
|
62 |
+
"experiment_summaries_interval": 10,
|
63 |
+
"flush_summaries_interval": 30,
|
64 |
+
"stats_avg": 100,
|
65 |
+
"summaries_use_frameskip": true,
|
66 |
+
"heartbeat_interval": 20,
|
67 |
+
"heartbeat_reporting_interval": 600,
|
68 |
+
"train_for_env_steps": 4000000,
|
69 |
+
"train_for_seconds": 10000000000,
|
70 |
+
"save_every_sec": 120,
|
71 |
+
"keep_checkpoints": 2,
|
72 |
+
"load_checkpoint_kind": "latest",
|
73 |
+
"save_milestones_sec": -1,
|
74 |
+
"save_best_every_sec": 5,
|
75 |
+
"save_best_metric": "reward",
|
76 |
+
"save_best_after": 100000,
|
77 |
+
"benchmark": false,
|
78 |
+
"encoder_mlp_layers": [
|
79 |
+
512,
|
80 |
+
512
|
81 |
+
],
|
82 |
+
"encoder_conv_architecture": "convnet_simple",
|
83 |
+
"encoder_conv_mlp_layers": [
|
84 |
+
512
|
85 |
+
],
|
86 |
+
"use_rnn": true,
|
87 |
+
"rnn_size": 512,
|
88 |
+
"rnn_type": "gru",
|
89 |
+
"rnn_num_layers": 1,
|
90 |
+
"decoder_mlp_layers": [],
|
91 |
+
"nonlinearity": "elu",
|
92 |
+
"policy_initialization": "orthogonal",
|
93 |
+
"policy_init_gain": 1.0,
|
94 |
+
"actor_critic_share_weights": true,
|
95 |
+
"adaptive_stddev": true,
|
96 |
+
"continuous_tanh_scale": 0.0,
|
97 |
+
"initial_stddev": 1.0,
|
98 |
+
"use_env_info_cache": false,
|
99 |
+
"env_gpu_actions": false,
|
100 |
+
"env_gpu_observations": true,
|
101 |
+
"env_frameskip": 4,
|
102 |
+
"env_framestack": 1,
|
103 |
+
"pixel_format": "CHW",
|
104 |
+
"use_record_episode_statistics": false,
|
105 |
+
"with_wandb": false,
|
106 |
+
"wandb_user": null,
|
107 |
+
"wandb_project": "sample_factory",
|
108 |
+
"wandb_group": null,
|
109 |
+
"wandb_job_type": "SF",
|
110 |
+
"wandb_tags": [],
|
111 |
+
"with_pbt": false,
|
112 |
+
"pbt_mix_policies_in_one_env": true,
|
113 |
+
"pbt_period_env_steps": 5000000,
|
114 |
+
"pbt_start_mutation": 20000000,
|
115 |
+
"pbt_replace_fraction": 0.3,
|
116 |
+
"pbt_mutation_rate": 0.15,
|
117 |
+
"pbt_replace_reward_gap": 0.1,
|
118 |
+
"pbt_replace_reward_gap_absolute": 1e-06,
|
119 |
+
"pbt_optimize_gamma": false,
|
120 |
+
"pbt_target_objective": "true_objective",
|
121 |
+
"pbt_perturb_min": 1.1,
|
122 |
+
"pbt_perturb_max": 1.5,
|
123 |
+
"num_agents": -1,
|
124 |
+
"num_humans": 0,
|
125 |
+
"num_bots": -1,
|
126 |
+
"start_bot_difficulty": null,
|
127 |
+
"timelimit": null,
|
128 |
+
"res_w": 128,
|
129 |
+
"res_h": 72,
|
130 |
+
"wide_aspect_ratio": false,
|
131 |
+
"eval_env_frameskip": 1,
|
132 |
+
"fps": 35,
|
133 |
+
"command_line": "--env=doom_health_gathering_supreme --num_workers=8 --num_envs_per_worker=4 --train_for_env_steps=4000000",
|
134 |
+
"cli_args": {
|
135 |
+
"env": "doom_health_gathering_supreme",
|
136 |
+
"num_workers": 8,
|
137 |
+
"num_envs_per_worker": 4,
|
138 |
+
"train_for_env_steps": 4000000
|
139 |
+
},
|
140 |
+
"git_hash": "372eb1042c1a2a82a2684e1795d47eaa26c046f7",
|
141 |
+
"git_repo_name": "https://github.com/huggingface/deep-rl-class.git"
|
142 |
+
}
|
git.diff
ADDED
File without changes
|
sf_log.txt
ADDED
@@ -0,0 +1,865 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2023-02-22 19:44:15,206][06183] Saving configuration to /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/config.json...
|
2 |
+
[2023-02-22 19:44:16,254][06183] Rollout worker 0 uses device cpu
|
3 |
+
[2023-02-22 19:44:16,257][06183] Rollout worker 1 uses device cpu
|
4 |
+
[2023-02-22 19:44:16,261][06183] Rollout worker 2 uses device cpu
|
5 |
+
[2023-02-22 19:44:16,263][06183] Rollout worker 3 uses device cpu
|
6 |
+
[2023-02-22 19:44:16,266][06183] Rollout worker 4 uses device cpu
|
7 |
+
[2023-02-22 19:44:16,269][06183] Rollout worker 5 uses device cpu
|
8 |
+
[2023-02-22 19:44:16,273][06183] Rollout worker 6 uses device cpu
|
9 |
+
[2023-02-22 19:44:16,276][06183] Rollout worker 7 uses device cpu
|
10 |
+
[2023-02-22 19:44:16,339][06183] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
11 |
+
[2023-02-22 19:44:16,341][06183] InferenceWorker_p0-w0: min num requests: 2
|
12 |
+
[2023-02-22 19:44:16,370][06183] Starting all processes...
|
13 |
+
[2023-02-22 19:44:16,372][06183] Starting process learner_proc0
|
14 |
+
[2023-02-22 19:44:16,762][06183] Starting all processes...
|
15 |
+
[2023-02-22 19:44:16,775][06183] Starting process inference_proc0-0
|
16 |
+
[2023-02-22 19:44:16,776][06183] Starting process rollout_proc0
|
17 |
+
[2023-02-22 19:44:16,776][06183] Starting process rollout_proc1
|
18 |
+
[2023-02-22 19:44:16,778][06183] Starting process rollout_proc2
|
19 |
+
[2023-02-22 19:44:16,779][06183] Starting process rollout_proc3
|
20 |
+
[2023-02-22 19:44:16,781][06183] Starting process rollout_proc4
|
21 |
+
[2023-02-22 19:44:16,897][06183] Starting process rollout_proc5
|
22 |
+
[2023-02-22 19:44:16,898][06183] Starting process rollout_proc6
|
23 |
+
[2023-02-22 19:44:16,900][06183] Starting process rollout_proc7
|
24 |
+
[2023-02-22 19:44:20,639][14984] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
25 |
+
[2023-02-22 19:44:20,640][14984] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
26 |
+
[2023-02-22 19:44:20,772][14984] Num visible devices: 1
|
27 |
+
[2023-02-22 19:44:20,793][14984] Starting seed is not provided
|
28 |
+
[2023-02-22 19:44:20,795][14984] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
29 |
+
[2023-02-22 19:44:20,796][14984] Initializing actor-critic model on device cuda:0
|
30 |
+
[2023-02-22 19:44:20,799][14984] RunningMeanStd input shape: (3, 72, 128)
|
31 |
+
[2023-02-22 19:44:20,803][14984] RunningMeanStd input shape: (1,)
|
32 |
+
[2023-02-22 19:44:20,823][14984] ConvEncoder: input_channels=3
|
33 |
+
[2023-02-22 19:44:20,829][15003] Worker 3 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
34 |
+
[2023-02-22 19:44:20,854][15000] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
35 |
+
[2023-02-22 19:44:20,855][15000] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
36 |
+
[2023-02-22 19:44:20,898][15001] Worker 0 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
37 |
+
[2023-02-22 19:44:20,926][15000] Num visible devices: 1
|
38 |
+
[2023-02-22 19:44:20,959][15005] Worker 4 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
39 |
+
[2023-02-22 19:44:20,960][15008] Worker 5 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
40 |
+
[2023-02-22 19:44:20,963][15004] Worker 2 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
41 |
+
[2023-02-22 19:44:21,060][15007] Worker 7 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
42 |
+
[2023-02-22 19:44:21,199][15006] Worker 6 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
43 |
+
[2023-02-22 19:44:21,913][15002] Worker 1 uses CPU cores [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
|
44 |
+
[2023-02-22 19:44:23,056][14984] Conv encoder output size: 512
|
45 |
+
[2023-02-22 19:44:23,057][14984] Policy head output size: 512
|
46 |
+
[2023-02-22 19:44:23,084][14984] Created Actor Critic model with architecture:
|
47 |
+
[2023-02-22 19:44:23,085][14984] ActorCriticSharedWeights(
|
48 |
+
(obs_normalizer): ObservationNormalizer(
|
49 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
50 |
+
(running_mean_std): ModuleDict(
|
51 |
+
(obs): RunningMeanStdInPlace()
|
52 |
+
)
|
53 |
+
)
|
54 |
+
)
|
55 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
56 |
+
(encoder): VizdoomEncoder(
|
57 |
+
(basic_encoder): ConvEncoder(
|
58 |
+
(enc): RecursiveScriptModule(
|
59 |
+
original_name=ConvEncoderImpl
|
60 |
+
(conv_head): RecursiveScriptModule(
|
61 |
+
original_name=Sequential
|
62 |
+
(0): RecursiveScriptModule(original_name=Conv2d)
|
63 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
64 |
+
(2): RecursiveScriptModule(original_name=Conv2d)
|
65 |
+
(3): RecursiveScriptModule(original_name=ELU)
|
66 |
+
(4): RecursiveScriptModule(original_name=Conv2d)
|
67 |
+
(5): RecursiveScriptModule(original_name=ELU)
|
68 |
+
)
|
69 |
+
(mlp_layers): RecursiveScriptModule(
|
70 |
+
original_name=Sequential
|
71 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
72 |
+
(1): RecursiveScriptModule(original_name=ELU)
|
73 |
+
)
|
74 |
+
)
|
75 |
+
)
|
76 |
+
)
|
77 |
+
(core): ModelCoreRNN(
|
78 |
+
(core): GRU(512, 512)
|
79 |
+
)
|
80 |
+
(decoder): MlpDecoder(
|
81 |
+
(mlp): Identity()
|
82 |
+
)
|
83 |
+
(critic_linear): Linear(in_features=512, out_features=1, bias=True)
|
84 |
+
(action_parameterization): ActionParameterizationDefault(
|
85 |
+
(distribution_linear): Linear(in_features=512, out_features=5, bias=True)
|
86 |
+
)
|
87 |
+
)
|
88 |
+
[2023-02-22 19:44:29,155][14984] Using optimizer <class 'torch.optim.adam.Adam'>
|
89 |
+
[2023-02-22 19:44:29,181][14984] No checkpoints found
|
90 |
+
[2023-02-22 19:44:29,183][14984] Did not load from checkpoint, starting from scratch!
|
91 |
+
[2023-02-22 19:44:29,186][14984] Initialized policy 0 weights for model version 0
|
92 |
+
[2023-02-22 19:44:29,202][14984] LearnerWorker_p0 finished initialization!
|
93 |
+
[2023-02-22 19:44:29,203][14984] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
94 |
+
[2023-02-22 19:44:29,445][15000] RunningMeanStd input shape: (3, 72, 128)
|
95 |
+
[2023-02-22 19:44:29,447][15000] RunningMeanStd input shape: (1,)
|
96 |
+
[2023-02-22 19:44:29,460][15000] ConvEncoder: input_channels=3
|
97 |
+
[2023-02-22 19:44:29,587][15000] Conv encoder output size: 512
|
98 |
+
[2023-02-22 19:44:29,589][15000] Policy head output size: 512
|
99 |
+
[2023-02-22 19:44:30,328][06183] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
100 |
+
[2023-02-22 19:44:33,566][06183] Inference worker 0-0 is ready!
|
101 |
+
[2023-02-22 19:44:33,568][06183] All inference workers are ready! Signal rollout workers to start!
|
102 |
+
[2023-02-22 19:44:33,613][15001] Doom resolution: 160x120, resize resolution: (128, 72)
|
103 |
+
[2023-02-22 19:44:33,613][15008] Doom resolution: 160x120, resize resolution: (128, 72)
|
104 |
+
[2023-02-22 19:44:33,614][15006] Doom resolution: 160x120, resize resolution: (128, 72)
|
105 |
+
[2023-02-22 19:44:33,615][15002] Doom resolution: 160x120, resize resolution: (128, 72)
|
106 |
+
[2023-02-22 19:44:33,616][15003] Doom resolution: 160x120, resize resolution: (128, 72)
|
107 |
+
[2023-02-22 19:44:33,618][15007] Doom resolution: 160x120, resize resolution: (128, 72)
|
108 |
+
[2023-02-22 19:44:33,620][15004] Doom resolution: 160x120, resize resolution: (128, 72)
|
109 |
+
[2023-02-22 19:44:33,622][15005] Doom resolution: 160x120, resize resolution: (128, 72)
|
110 |
+
[2023-02-22 19:44:34,106][15005] VizDoom game.init() threw an exception ViZDoomUnexpectedExitException('Controlled ViZDoom instance exited unexpectedly.'). Terminate process...
|
111 |
+
[2023-02-22 19:44:34,108][15005] EvtLoop [rollout_proc4_evt_loop, process=rollout_proc4] unhandled exception in slot='init' connected to emitter=Emitter(object_id='Sampler', signal_name='_inference_workers_initialized'), args=()
|
112 |
+
Traceback (most recent call last):
|
113 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 228, in _game_init
|
114 |
+
self.game.init()
|
115 |
+
vizdoom.vizdoom.ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.
|
116 |
+
|
117 |
+
During handling of the above exception, another exception occurred:
|
118 |
+
|
119 |
+
Traceback (most recent call last):
|
120 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/signal_slot/signal_slot.py", line 355, in _process_signal
|
121 |
+
slot_callable(*args)
|
122 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/sampling/rollout_worker.py", line 150, in init
|
123 |
+
env_runner.init(self.timing)
|
124 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 418, in init
|
125 |
+
self._reset()
|
126 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/sampling/non_batched_sampling.py", line 430, in _reset
|
127 |
+
observations, info = e.reset(seed=seed) # new way of doing seeding since Gym 0.26.0
|
128 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/gym/core.py", line 323, in reset
|
129 |
+
return self.env.reset(**kwargs)
|
130 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/utils/make_env.py", line 125, in reset
|
131 |
+
obs, info = self.env.reset(**kwargs)
|
132 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/algo/utils/make_env.py", line 110, in reset
|
133 |
+
obs, info = self.env.reset(**kwargs)
|
134 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/wrappers/scenario_wrappers/gathering_reward_shaping.py", line 30, in reset
|
135 |
+
return self.env.reset(**kwargs)
|
136 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/gym/core.py", line 379, in reset
|
137 |
+
obs, info = self.env.reset(**kwargs)
|
138 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sample_factory/envs/env_wrappers.py", line 84, in reset
|
139 |
+
obs, info = self.env.reset(**kwargs)
|
140 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/gym/core.py", line 323, in reset
|
141 |
+
return self.env.reset(**kwargs)
|
142 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/wrappers/multiplayer_stats.py", line 51, in reset
|
143 |
+
return self.env.reset(**kwargs)
|
144 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 323, in reset
|
145 |
+
self._ensure_initialized()
|
146 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 274, in _ensure_initialized
|
147 |
+
self.initialize()
|
148 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 269, in initialize
|
149 |
+
self._game_init()
|
150 |
+
File "/home/chqma/miniconda3/envs/deep-rl-class/lib/python3.9/site-packages/sf_examples/vizdoom/doom/doom_gym.py", line 244, in _game_init
|
151 |
+
raise EnvCriticalError()
|
152 |
+
sample_factory.envs.env_utils.EnvCriticalError
|
153 |
+
[2023-02-22 19:44:34,110][15005] Unhandled exception in evt loop rollout_proc4_evt_loop
|
154 |
+
[2023-02-22 19:44:35,328][06183] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
155 |
+
[2023-02-22 19:44:36,332][06183] Heartbeat connected on Batcher_0
|
156 |
+
[2023-02-22 19:44:36,335][06183] Heartbeat connected on LearnerWorker_p0
|
157 |
+
[2023-02-22 19:44:36,369][06183] Heartbeat connected on InferenceWorker_p0-w0
|
158 |
+
[2023-02-22 19:44:37,192][15006] Decorrelating experience for 0 frames...
|
159 |
+
[2023-02-22 19:44:37,192][15001] Decorrelating experience for 0 frames...
|
160 |
+
[2023-02-22 19:44:37,192][15002] Decorrelating experience for 0 frames...
|
161 |
+
[2023-02-22 19:44:37,192][15004] Decorrelating experience for 0 frames...
|
162 |
+
[2023-02-22 19:44:37,193][15008] Decorrelating experience for 0 frames...
|
163 |
+
[2023-02-22 19:44:37,921][15006] Decorrelating experience for 32 frames...
|
164 |
+
[2023-02-22 19:44:37,923][15008] Decorrelating experience for 32 frames...
|
165 |
+
[2023-02-22 19:44:37,928][15004] Decorrelating experience for 32 frames...
|
166 |
+
[2023-02-22 19:44:37,928][15001] Decorrelating experience for 32 frames...
|
167 |
+
[2023-02-22 19:44:37,929][15002] Decorrelating experience for 32 frames...
|
168 |
+
[2023-02-22 19:44:37,934][15007] Decorrelating experience for 0 frames...
|
169 |
+
[2023-02-22 19:44:38,003][15003] Decorrelating experience for 0 frames...
|
170 |
+
[2023-02-22 19:44:38,768][15007] Decorrelating experience for 32 frames...
|
171 |
+
[2023-02-22 19:44:38,809][15004] Decorrelating experience for 64 frames...
|
172 |
+
[2023-02-22 19:44:38,814][15008] Decorrelating experience for 64 frames...
|
173 |
+
[2023-02-22 19:44:38,851][15003] Decorrelating experience for 32 frames...
|
174 |
+
[2023-02-22 19:44:38,882][15006] Decorrelating experience for 64 frames...
|
175 |
+
[2023-02-22 19:44:39,585][15007] Decorrelating experience for 64 frames...
|
176 |
+
[2023-02-22 19:44:39,604][15002] Decorrelating experience for 64 frames...
|
177 |
+
[2023-02-22 19:44:39,646][15004] Decorrelating experience for 96 frames...
|
178 |
+
[2023-02-22 19:44:39,647][15001] Decorrelating experience for 64 frames...
|
179 |
+
[2023-02-22 19:44:39,675][15003] Decorrelating experience for 64 frames...
|
180 |
+
[2023-02-22 19:44:39,714][06183] Heartbeat connected on RolloutWorker_w2
|
181 |
+
[2023-02-22 19:44:40,328][06183] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 0.0. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
182 |
+
[2023-02-22 19:44:40,419][15007] Decorrelating experience for 96 frames...
|
183 |
+
[2023-02-22 19:44:40,421][15008] Decorrelating experience for 96 frames...
|
184 |
+
[2023-02-22 19:44:40,464][15002] Decorrelating experience for 96 frames...
|
185 |
+
[2023-02-22 19:44:40,490][15006] Decorrelating experience for 96 frames...
|
186 |
+
[2023-02-22 19:44:40,516][06183] Heartbeat connected on RolloutWorker_w7
|
187 |
+
[2023-02-22 19:44:40,536][06183] Heartbeat connected on RolloutWorker_w5
|
188 |
+
[2023-02-22 19:44:40,559][06183] Heartbeat connected on RolloutWorker_w1
|
189 |
+
[2023-02-22 19:44:40,599][06183] Heartbeat connected on RolloutWorker_w6
|
190 |
+
[2023-02-22 19:44:41,183][15003] Decorrelating experience for 96 frames...
|
191 |
+
[2023-02-22 19:44:41,253][15001] Decorrelating experience for 96 frames...
|
192 |
+
[2023-02-22 19:44:41,272][06183] Heartbeat connected on RolloutWorker_w3
|
193 |
+
[2023-02-22 19:44:41,345][06183] Heartbeat connected on RolloutWorker_w0
|
194 |
+
[2023-02-22 19:44:45,328][06183] Fps is (10 sec: 0.0, 60 sec: 0.0, 300 sec: 0.0). Total num frames: 0. Throughput: 0: 2.1. Samples: 32. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
195 |
+
[2023-02-22 19:44:45,332][06183] Avg episode reward: [(0, '1.914')]
|
196 |
+
[2023-02-22 19:44:45,876][14984] Signal inference workers to stop experience collection...
|
197 |
+
[2023-02-22 19:44:45,884][15000] InferenceWorker_p0-w0: stopping experience collection
|
198 |
+
[2023-02-22 19:44:49,504][14984] Signal inference workers to resume experience collection...
|
199 |
+
[2023-02-22 19:44:49,506][15000] InferenceWorker_p0-w0: resuming experience collection
|
200 |
+
[2023-02-22 19:44:50,328][06183] Fps is (10 sec: 409.6, 60 sec: 204.8, 300 sec: 204.8). Total num frames: 4096. Throughput: 0: 147.1. Samples: 2942. Policy #0 lag: (min: 0.0, avg: 0.0, max: 0.0)
|
201 |
+
[2023-02-22 19:44:50,334][06183] Avg episode reward: [(0, '3.108')]
|
202 |
+
[2023-02-22 19:44:54,506][15000] Updated weights for policy 0, policy_version 10 (0.0364)
|
203 |
+
[2023-02-22 19:44:55,328][06183] Fps is (10 sec: 4505.6, 60 sec: 1802.2, 300 sec: 1802.2). Total num frames: 45056. Throughput: 0: 454.3. Samples: 11358. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
204 |
+
[2023-02-22 19:44:55,330][06183] Avg episode reward: [(0, '4.406')]
|
205 |
+
[2023-02-22 19:44:59,586][15000] Updated weights for policy 0, policy_version 20 (0.0015)
|
206 |
+
[2023-02-22 19:45:00,329][06183] Fps is (10 sec: 8191.6, 60 sec: 2867.1, 300 sec: 2867.1). Total num frames: 86016. Throughput: 0: 568.7. Samples: 17060. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
207 |
+
[2023-02-22 19:45:00,332][06183] Avg episode reward: [(0, '4.412')]
|
208 |
+
[2023-02-22 19:45:04,441][15000] Updated weights for policy 0, policy_version 30 (0.0012)
|
209 |
+
[2023-02-22 19:45:05,328][06183] Fps is (10 sec: 8192.0, 60 sec: 3627.9, 300 sec: 3627.9). Total num frames: 126976. Throughput: 0: 849.7. Samples: 29740. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
210 |
+
[2023-02-22 19:45:05,331][06183] Avg episode reward: [(0, '4.560')]
|
211 |
+
[2023-02-22 19:45:05,400][14984] Saving new best policy, reward=4.560!
|
212 |
+
[2023-02-22 19:45:09,187][15000] Updated weights for policy 0, policy_version 40 (0.0010)
|
213 |
+
[2023-02-22 19:45:10,328][06183] Fps is (10 sec: 8601.9, 60 sec: 4300.8, 300 sec: 4300.8). Total num frames: 172032. Throughput: 0: 1068.5. Samples: 42740. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
214 |
+
[2023-02-22 19:45:10,332][06183] Avg episode reward: [(0, '4.466')]
|
215 |
+
[2023-02-22 19:45:14,107][15000] Updated weights for policy 0, policy_version 50 (0.0017)
|
216 |
+
[2023-02-22 19:45:15,328][06183] Fps is (10 sec: 8601.7, 60 sec: 4733.2, 300 sec: 4733.2). Total num frames: 212992. Throughput: 0: 1085.6. Samples: 48854. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0)
|
217 |
+
[2023-02-22 19:45:15,330][06183] Avg episode reward: [(0, '4.413')]
|
218 |
+
[2023-02-22 19:45:18,942][15000] Updated weights for policy 0, policy_version 60 (0.0011)
|
219 |
+
[2023-02-22 19:45:20,328][06183] Fps is (10 sec: 8601.7, 60 sec: 5160.9, 300 sec: 5160.9). Total num frames: 258048. Throughput: 0: 1371.8. Samples: 61732. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
220 |
+
[2023-02-22 19:45:20,331][06183] Avg episode reward: [(0, '4.563')]
|
221 |
+
[2023-02-22 19:45:20,341][14984] Saving new best policy, reward=4.563!
|
222 |
+
[2023-02-22 19:45:23,692][15000] Updated weights for policy 0, policy_version 70 (0.0013)
|
223 |
+
[2023-02-22 19:45:25,328][06183] Fps is (10 sec: 8601.5, 60 sec: 5436.5, 300 sec: 5436.5). Total num frames: 299008. Throughput: 0: 1648.9. Samples: 74202. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
224 |
+
[2023-02-22 19:45:25,332][06183] Avg episode reward: [(0, '4.329')]
|
225 |
+
[2023-02-22 19:45:28,664][15000] Updated weights for policy 0, policy_version 80 (0.0011)
|
226 |
+
[2023-02-22 19:45:30,328][06183] Fps is (10 sec: 8192.2, 60 sec: 5666.1, 300 sec: 5666.1). Total num frames: 339968. Throughput: 0: 1788.4. Samples: 80512. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
227 |
+
[2023-02-22 19:45:30,330][06183] Avg episode reward: [(0, '4.295')]
|
228 |
+
[2023-02-22 19:45:33,515][15000] Updated weights for policy 0, policy_version 90 (0.0009)
|
229 |
+
[2023-02-22 19:45:35,328][06183] Fps is (10 sec: 8192.1, 60 sec: 6348.8, 300 sec: 5860.4). Total num frames: 380928. Throughput: 0: 2007.0. Samples: 93258. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
230 |
+
[2023-02-22 19:45:35,330][06183] Avg episode reward: [(0, '4.407')]
|
231 |
+
[2023-02-22 19:45:38,572][15000] Updated weights for policy 0, policy_version 100 (0.0012)
|
232 |
+
[2023-02-22 19:45:40,328][06183] Fps is (10 sec: 8191.9, 60 sec: 7031.5, 300 sec: 6027.0). Total num frames: 421888. Throughput: 0: 2093.5. Samples: 105566. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
233 |
+
[2023-02-22 19:45:40,331][06183] Avg episode reward: [(0, '4.493')]
|
234 |
+
[2023-02-22 19:45:43,650][15000] Updated weights for policy 0, policy_version 110 (0.0014)
|
235 |
+
[2023-02-22 19:45:45,328][06183] Fps is (10 sec: 8192.0, 60 sec: 7714.1, 300 sec: 6171.3). Total num frames: 462848. Throughput: 0: 2099.2. Samples: 111522. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
236 |
+
[2023-02-22 19:45:45,331][06183] Avg episode reward: [(0, '4.525')]
|
237 |
+
[2023-02-22 19:45:48,756][15000] Updated weights for policy 0, policy_version 120 (0.0012)
|
238 |
+
[2023-02-22 19:45:50,328][06183] Fps is (10 sec: 7782.4, 60 sec: 8260.3, 300 sec: 6246.4). Total num frames: 499712. Throughput: 0: 2078.4. Samples: 123266. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
239 |
+
[2023-02-22 19:45:50,331][06183] Avg episode reward: [(0, '4.624')]
|
240 |
+
[2023-02-22 19:45:50,477][14984] Saving new best policy, reward=4.624!
|
241 |
+
[2023-02-22 19:45:54,234][15000] Updated weights for policy 0, policy_version 130 (0.0015)
|
242 |
+
[2023-02-22 19:45:55,328][06183] Fps is (10 sec: 7782.3, 60 sec: 8260.3, 300 sec: 6360.8). Total num frames: 540672. Throughput: 0: 2043.6. Samples: 134702. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
243 |
+
[2023-02-22 19:45:55,332][06183] Avg episode reward: [(0, '4.484')]
|
244 |
+
[2023-02-22 19:45:59,467][15000] Updated weights for policy 0, policy_version 140 (0.0014)
|
245 |
+
[2023-02-22 19:46:00,328][06183] Fps is (10 sec: 7782.3, 60 sec: 8192.1, 300 sec: 6417.1). Total num frames: 577536. Throughput: 0: 2037.4. Samples: 140538. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
246 |
+
[2023-02-22 19:46:00,331][06183] Avg episode reward: [(0, '4.447')]
|
247 |
+
[2023-02-22 19:46:04,827][15000] Updated weights for policy 0, policy_version 150 (0.0021)
|
248 |
+
[2023-02-22 19:46:05,328][06183] Fps is (10 sec: 7372.9, 60 sec: 8123.7, 300 sec: 6467.4). Total num frames: 614400. Throughput: 0: 2006.8. Samples: 152036. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
249 |
+
[2023-02-22 19:46:05,332][06183] Avg episode reward: [(0, '4.358')]
|
250 |
+
[2023-02-22 19:46:10,160][15000] Updated weights for policy 0, policy_version 160 (0.0017)
|
251 |
+
[2023-02-22 19:46:10,328][06183] Fps is (10 sec: 7782.4, 60 sec: 8055.5, 300 sec: 6553.6). Total num frames: 655360. Throughput: 0: 1985.4. Samples: 163544. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
252 |
+
[2023-02-22 19:46:10,331][06183] Avg episode reward: [(0, '4.399')]
|
253 |
+
[2023-02-22 19:46:10,354][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000160_655360.pth...
|
254 |
+
[2023-02-22 19:46:15,328][06183] Fps is (10 sec: 7782.3, 60 sec: 7987.2, 300 sec: 6592.6). Total num frames: 692224. Throughput: 0: 1978.1. Samples: 169526. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
255 |
+
[2023-02-22 19:46:15,331][06183] Avg episode reward: [(0, '4.615')]
|
256 |
+
[2023-02-22 19:46:15,414][15000] Updated weights for policy 0, policy_version 170 (0.0019)
|
257 |
+
[2023-02-22 19:46:20,328][06183] Fps is (10 sec: 7782.4, 60 sec: 7918.9, 300 sec: 6665.3). Total num frames: 733184. Throughput: 0: 1950.5. Samples: 181032. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
258 |
+
[2023-02-22 19:46:20,331][06183] Avg episode reward: [(0, '4.568')]
|
259 |
+
[2023-02-22 19:46:20,725][15000] Updated weights for policy 0, policy_version 180 (0.0012)
|
260 |
+
[2023-02-22 19:46:25,328][06183] Fps is (10 sec: 7372.8, 60 sec: 7782.4, 300 sec: 6660.4). Total num frames: 765952. Throughput: 0: 1912.4. Samples: 191622. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
261 |
+
[2023-02-22 19:46:25,333][06183] Avg episode reward: [(0, '4.507')]
|
262 |
+
[2023-02-22 19:46:26,853][15000] Updated weights for policy 0, policy_version 190 (0.0017)
|
263 |
+
[2023-02-22 19:46:30,328][06183] Fps is (10 sec: 6553.6, 60 sec: 7645.8, 300 sec: 6656.0). Total num frames: 798720. Throughput: 0: 1887.3. Samples: 196452. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
264 |
+
[2023-02-22 19:46:30,332][06183] Avg episode reward: [(0, '4.383')]
|
265 |
+
[2023-02-22 19:46:33,084][15000] Updated weights for policy 0, policy_version 200 (0.0023)
|
266 |
+
[2023-02-22 19:46:35,328][06183] Fps is (10 sec: 6553.7, 60 sec: 7509.3, 300 sec: 6651.9). Total num frames: 831488. Throughput: 0: 1848.0. Samples: 206428. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
267 |
+
[2023-02-22 19:46:35,332][06183] Avg episode reward: [(0, '4.311')]
|
268 |
+
[2023-02-22 19:46:39,210][15000] Updated weights for policy 0, policy_version 210 (0.0020)
|
269 |
+
[2023-02-22 19:46:40,328][06183] Fps is (10 sec: 6963.3, 60 sec: 7441.1, 300 sec: 6679.6). Total num frames: 868352. Throughput: 0: 1817.5. Samples: 216490. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
270 |
+
[2023-02-22 19:46:40,332][06183] Avg episode reward: [(0, '4.257')]
|
271 |
+
[2023-02-22 19:46:45,328][06183] Fps is (10 sec: 6553.5, 60 sec: 7236.2, 300 sec: 6644.6). Total num frames: 897024. Throughput: 0: 1795.2. Samples: 221324. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
272 |
+
[2023-02-22 19:46:45,332][06183] Avg episode reward: [(0, '4.425')]
|
273 |
+
[2023-02-22 19:46:45,856][15000] Updated weights for policy 0, policy_version 220 (0.0026)
|
274 |
+
[2023-02-22 19:46:50,329][06183] Fps is (10 sec: 5734.2, 60 sec: 7099.7, 300 sec: 6612.1). Total num frames: 925696. Throughput: 0: 1726.7. Samples: 229738. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
275 |
+
[2023-02-22 19:46:50,334][06183] Avg episode reward: [(0, '4.426')]
|
276 |
+
[2023-02-22 19:46:53,358][15000] Updated weights for policy 0, policy_version 230 (0.0032)
|
277 |
+
[2023-02-22 19:46:55,329][06183] Fps is (10 sec: 5734.1, 60 sec: 6894.9, 300 sec: 6581.8). Total num frames: 954368. Throughput: 0: 1659.2. Samples: 238210. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
278 |
+
[2023-02-22 19:46:55,333][06183] Avg episode reward: [(0, '4.467')]
|
279 |
+
[2023-02-22 19:46:59,818][15000] Updated weights for policy 0, policy_version 240 (0.0017)
|
280 |
+
[2023-02-22 19:47:00,328][06183] Fps is (10 sec: 5734.5, 60 sec: 6758.4, 300 sec: 6553.6). Total num frames: 983040. Throughput: 0: 1633.1. Samples: 243016. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
281 |
+
[2023-02-22 19:47:00,333][06183] Avg episode reward: [(0, '4.537')]
|
282 |
+
[2023-02-22 19:47:05,328][06183] Fps is (10 sec: 5734.8, 60 sec: 6621.9, 300 sec: 6527.2). Total num frames: 1011712. Throughput: 0: 1578.8. Samples: 252080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
283 |
+
[2023-02-22 19:47:05,332][06183] Avg episode reward: [(0, '4.393')]
|
284 |
+
[2023-02-22 19:47:06,684][15000] Updated weights for policy 0, policy_version 250 (0.0026)
|
285 |
+
[2023-02-22 19:47:10,328][06183] Fps is (10 sec: 6144.0, 60 sec: 6485.3, 300 sec: 6528.0). Total num frames: 1044480. Throughput: 0: 1543.8. Samples: 261092. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
286 |
+
[2023-02-22 19:47:10,332][06183] Avg episode reward: [(0, '4.500')]
|
287 |
+
[2023-02-22 19:47:13,877][15000] Updated weights for policy 0, policy_version 260 (0.0034)
|
288 |
+
[2023-02-22 19:47:15,329][06183] Fps is (10 sec: 5734.1, 60 sec: 6280.5, 300 sec: 6479.1). Total num frames: 1069056. Throughput: 0: 1528.6. Samples: 265238. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
289 |
+
[2023-02-22 19:47:15,336][06183] Avg episode reward: [(0, '4.446')]
|
290 |
+
[2023-02-22 19:47:20,329][06183] Fps is (10 sec: 4505.5, 60 sec: 5939.2, 300 sec: 6409.0). Total num frames: 1089536. Throughput: 0: 1465.9. Samples: 272394. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
291 |
+
[2023-02-22 19:47:20,335][06183] Avg episode reward: [(0, '4.503')]
|
292 |
+
[2023-02-22 19:47:22,727][15000] Updated weights for policy 0, policy_version 270 (0.0038)
|
293 |
+
[2023-02-22 19:47:25,329][06183] Fps is (10 sec: 4505.7, 60 sec: 5802.6, 300 sec: 6366.3). Total num frames: 1114112. Throughput: 0: 1393.6. Samples: 279204. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
294 |
+
[2023-02-22 19:47:25,343][06183] Avg episode reward: [(0, '4.563')]
|
295 |
+
[2023-02-22 19:47:30,331][06183] Fps is (10 sec: 4095.2, 60 sec: 5529.4, 300 sec: 6280.5). Total num frames: 1130496. Throughput: 0: 1344.1. Samples: 281812. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
296 |
+
[2023-02-22 19:47:30,337][06183] Avg episode reward: [(0, '4.578')]
|
297 |
+
[2023-02-22 19:47:34,132][15000] Updated weights for policy 0, policy_version 280 (0.0075)
|
298 |
+
[2023-02-22 19:47:35,329][06183] Fps is (10 sec: 3686.4, 60 sec: 5324.7, 300 sec: 6221.5). Total num frames: 1150976. Throughput: 0: 1267.7. Samples: 286786. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
299 |
+
[2023-02-22 19:47:35,332][06183] Avg episode reward: [(0, '4.419')]
|
300 |
+
[2023-02-22 19:47:40,329][06183] Fps is (10 sec: 4506.3, 60 sec: 5119.9, 300 sec: 6187.1). Total num frames: 1175552. Throughput: 0: 1232.6. Samples: 293676. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
301 |
+
[2023-02-22 19:47:40,334][06183] Avg episode reward: [(0, '4.364')]
|
302 |
+
[2023-02-22 19:47:43,155][15000] Updated weights for policy 0, policy_version 290 (0.0041)
|
303 |
+
[2023-02-22 19:47:45,329][06183] Fps is (10 sec: 4505.7, 60 sec: 4983.4, 300 sec: 6133.5). Total num frames: 1196032. Throughput: 0: 1201.9. Samples: 297102. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
304 |
+
[2023-02-22 19:47:45,333][06183] Avg episode reward: [(0, '4.416')]
|
305 |
+
[2023-02-22 19:47:50,329][06183] Fps is (10 sec: 4096.1, 60 sec: 4846.9, 300 sec: 6082.5). Total num frames: 1216512. Throughput: 0: 1132.1. Samples: 303026. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
306 |
+
[2023-02-22 19:47:50,341][06183] Avg episode reward: [(0, '4.404')]
|
307 |
+
[2023-02-22 19:47:53,335][15000] Updated weights for policy 0, policy_version 300 (0.0062)
|
308 |
+
[2023-02-22 19:47:55,329][06183] Fps is (10 sec: 4096.0, 60 sec: 4710.4, 300 sec: 6034.1). Total num frames: 1236992. Throughput: 0: 1073.8. Samples: 309412. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
309 |
+
[2023-02-22 19:47:55,333][06183] Avg episode reward: [(0, '4.453')]
|
310 |
+
[2023-02-22 19:48:00,329][06183] Fps is (10 sec: 4915.2, 60 sec: 4710.4, 300 sec: 6027.0). Total num frames: 1265664. Throughput: 0: 1071.2. Samples: 313444. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
311 |
+
[2023-02-22 19:48:00,337][06183] Avg episode reward: [(0, '4.269')]
|
312 |
+
[2023-02-22 19:48:01,035][15000] Updated weights for policy 0, policy_version 310 (0.0037)
|
313 |
+
[2023-02-22 19:48:05,328][06183] Fps is (10 sec: 5324.8, 60 sec: 4642.1, 300 sec: 6001.1). Total num frames: 1290240. Throughput: 0: 1082.8. Samples: 321122. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
314 |
+
[2023-02-22 19:48:05,334][06183] Avg episode reward: [(0, '4.324')]
|
315 |
+
[2023-02-22 19:48:09,841][15000] Updated weights for policy 0, policy_version 320 (0.0035)
|
316 |
+
[2023-02-22 19:48:10,328][06183] Fps is (10 sec: 4505.7, 60 sec: 4437.3, 300 sec: 5957.8). Total num frames: 1310720. Throughput: 0: 1086.6. Samples: 328102. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
317 |
+
[2023-02-22 19:48:10,333][06183] Avg episode reward: [(0, '4.457')]
|
318 |
+
[2023-02-22 19:48:10,378][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000320_1310720.pth...
|
319 |
+
[2023-02-22 19:48:15,328][06183] Fps is (10 sec: 4505.6, 60 sec: 4437.4, 300 sec: 5934.6). Total num frames: 1335296. Throughput: 0: 1111.8. Samples: 331840. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
320 |
+
[2023-02-22 19:48:15,333][06183] Avg episode reward: [(0, '4.421')]
|
321 |
+
[2023-02-22 19:48:19,348][15000] Updated weights for policy 0, policy_version 330 (0.0053)
|
322 |
+
[2023-02-22 19:48:20,329][06183] Fps is (10 sec: 4505.3, 60 sec: 4437.3, 300 sec: 5894.7). Total num frames: 1355776. Throughput: 0: 1137.7. Samples: 337982. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
323 |
+
[2023-02-22 19:48:20,334][06183] Avg episode reward: [(0, '4.440')]
|
324 |
+
[2023-02-22 19:48:25,329][06183] Fps is (10 sec: 4095.8, 60 sec: 4369.0, 300 sec: 5856.4). Total num frames: 1376256. Throughput: 0: 1117.7. Samples: 343972. Policy #0 lag: (min: 0.0, avg: 0.3, max: 1.0)
|
325 |
+
[2023-02-22 19:48:25,343][06183] Avg episode reward: [(0, '4.406')]
|
326 |
+
[2023-02-22 19:48:28,754][15000] Updated weights for policy 0, policy_version 340 (0.0045)
|
327 |
+
[2023-02-22 19:48:30,329][06183] Fps is (10 sec: 4096.3, 60 sec: 4437.5, 300 sec: 5819.7). Total num frames: 1396736. Throughput: 0: 1115.6. Samples: 347306. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
328 |
+
[2023-02-22 19:48:30,333][06183] Avg episode reward: [(0, '4.419')]
|
329 |
+
[2023-02-22 19:48:35,329][06183] Fps is (10 sec: 4505.8, 60 sec: 4505.6, 300 sec: 5801.3). Total num frames: 1421312. Throughput: 0: 1149.8. Samples: 354766. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
330 |
+
[2023-02-22 19:48:35,333][06183] Avg episode reward: [(0, '4.488')]
|
331 |
+
[2023-02-22 19:48:37,340][15000] Updated weights for policy 0, policy_version 350 (0.0039)
|
332 |
+
[2023-02-22 19:48:40,329][06183] Fps is (10 sec: 4505.5, 60 sec: 4437.3, 300 sec: 5767.2). Total num frames: 1441792. Throughput: 0: 1151.1. Samples: 361214. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
333 |
+
[2023-02-22 19:48:40,343][06183] Avg episode reward: [(0, '4.412')]
|
334 |
+
[2023-02-22 19:48:45,328][06183] Fps is (10 sec: 4096.1, 60 sec: 4437.4, 300 sec: 5734.4). Total num frames: 1462272. Throughput: 0: 1120.4. Samples: 363862. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
335 |
+
[2023-02-22 19:48:45,334][06183] Avg episode reward: [(0, '4.358')]
|
336 |
+
[2023-02-22 19:48:48,069][15000] Updated weights for policy 0, policy_version 360 (0.0038)
|
337 |
+
[2023-02-22 19:48:50,328][06183] Fps is (10 sec: 4096.2, 60 sec: 4437.3, 300 sec: 5702.9). Total num frames: 1482752. Throughput: 0: 1088.8. Samples: 370116. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
338 |
+
[2023-02-22 19:48:50,336][06183] Avg episode reward: [(0, '4.525')]
|
339 |
+
[2023-02-22 19:48:55,329][06183] Fps is (10 sec: 4095.7, 60 sec: 4437.3, 300 sec: 5672.6). Total num frames: 1503232. Throughput: 0: 1070.1. Samples: 376256. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
340 |
+
[2023-02-22 19:48:55,348][06183] Avg episode reward: [(0, '4.419')]
|
341 |
+
[2023-02-22 19:48:58,116][15000] Updated weights for policy 0, policy_version 370 (0.0050)
|
342 |
+
[2023-02-22 19:49:00,329][06183] Fps is (10 sec: 4095.8, 60 sec: 4300.8, 300 sec: 5643.4). Total num frames: 1523712. Throughput: 0: 1050.0. Samples: 379090. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
343 |
+
[2023-02-22 19:49:00,340][06183] Avg episode reward: [(0, '4.281')]
|
344 |
+
[2023-02-22 19:49:05,329][06183] Fps is (10 sec: 4096.1, 60 sec: 4232.5, 300 sec: 5615.2). Total num frames: 1544192. Throughput: 0: 1053.6. Samples: 385394. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
345 |
+
[2023-02-22 19:49:05,339][06183] Avg episode reward: [(0, '4.351')]
|
346 |
+
[2023-02-22 19:49:10,328][06183] Fps is (10 sec: 2867.4, 60 sec: 4027.7, 300 sec: 5544.2). Total num frames: 1552384. Throughput: 0: 1005.1. Samples: 389200. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
347 |
+
[2023-02-22 19:49:10,334][06183] Avg episode reward: [(0, '4.375')]
|
348 |
+
[2023-02-22 19:49:10,896][15000] Updated weights for policy 0, policy_version 380 (0.0070)
|
349 |
+
[2023-02-22 19:49:15,329][06183] Fps is (10 sec: 2048.0, 60 sec: 3822.9, 300 sec: 5490.1). Total num frames: 1564672. Throughput: 0: 963.8. Samples: 390676. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
350 |
+
[2023-02-22 19:49:15,339][06183] Avg episode reward: [(0, '4.489')]
|
351 |
+
[2023-02-22 19:49:20,329][06183] Fps is (10 sec: 2867.1, 60 sec: 3754.7, 300 sec: 5451.9). Total num frames: 1581056. Throughput: 0: 895.7. Samples: 395074. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
352 |
+
[2023-02-22 19:49:20,347][06183] Avg episode reward: [(0, '4.518')]
|
353 |
+
[2023-02-22 19:49:24,344][15000] Updated weights for policy 0, policy_version 390 (0.0063)
|
354 |
+
[2023-02-22 19:49:25,329][06183] Fps is (10 sec: 3276.7, 60 sec: 3686.4, 300 sec: 5415.0). Total num frames: 1597440. Throughput: 0: 864.5. Samples: 400116. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
355 |
+
[2023-02-22 19:49:25,341][06183] Avg episode reward: [(0, '4.529')]
|
356 |
+
[2023-02-22 19:49:30,329][06183] Fps is (10 sec: 3276.9, 60 sec: 3618.1, 300 sec: 5470.6). Total num frames: 1613824. Throughput: 0: 859.3. Samples: 402530. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
357 |
+
[2023-02-22 19:49:30,337][06183] Avg episode reward: [(0, '4.408')]
|
358 |
+
[2023-02-22 19:49:35,329][06183] Fps is (10 sec: 3276.8, 60 sec: 3481.6, 300 sec: 5526.1). Total num frames: 1630208. Throughput: 0: 823.4. Samples: 407170. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
359 |
+
[2023-02-22 19:49:35,340][06183] Avg episode reward: [(0, '4.425')]
|
360 |
+
[2023-02-22 19:49:37,868][15000] Updated weights for policy 0, policy_version 400 (0.0061)
|
361 |
+
[2023-02-22 19:49:40,330][06183] Fps is (10 sec: 2866.9, 60 sec: 3345.0, 300 sec: 5567.8). Total num frames: 1642496. Throughput: 0: 784.7. Samples: 411570. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
362 |
+
[2023-02-22 19:49:40,341][06183] Avg episode reward: [(0, '4.559')]
|
363 |
+
[2023-02-22 19:49:45,329][06183] Fps is (10 sec: 2867.3, 60 sec: 3276.8, 300 sec: 5609.4). Total num frames: 1658880. Throughput: 0: 768.7. Samples: 413680. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
364 |
+
[2023-02-22 19:49:45,359][06183] Avg episode reward: [(0, '4.448')]
|
365 |
+
[2023-02-22 19:49:50,330][06183] Fps is (10 sec: 2867.3, 60 sec: 3140.2, 300 sec: 5512.2). Total num frames: 1671168. Throughput: 0: 718.5. Samples: 417728. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
366 |
+
[2023-02-22 19:49:50,342][06183] Avg episode reward: [(0, '4.462')]
|
367 |
+
[2023-02-22 19:49:54,192][15000] Updated weights for policy 0, policy_version 410 (0.0085)
|
368 |
+
[2023-02-22 19:49:55,329][06183] Fps is (10 sec: 2048.1, 60 sec: 2935.5, 300 sec: 5401.2). Total num frames: 1679360. Throughput: 0: 695.7. Samples: 420506. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
369 |
+
[2023-02-22 19:49:55,351][06183] Avg episode reward: [(0, '4.586')]
|
370 |
+
[2023-02-22 19:50:00,329][06183] Fps is (10 sec: 2048.0, 60 sec: 2798.9, 300 sec: 5304.0). Total num frames: 1691648. Throughput: 0: 698.0. Samples: 422088. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
371 |
+
[2023-02-22 19:50:00,351][06183] Avg episode reward: [(0, '4.457')]
|
372 |
+
[2023-02-22 19:50:05,329][06183] Fps is (10 sec: 2457.5, 60 sec: 2662.4, 300 sec: 5192.9). Total num frames: 1703936. Throughput: 0: 678.6. Samples: 425612. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
373 |
+
[2023-02-22 19:50:05,337][06183] Avg episode reward: [(0, '4.308')]
|
374 |
+
[2023-02-22 19:50:10,329][06183] Fps is (10 sec: 2048.1, 60 sec: 2662.4, 300 sec: 5081.8). Total num frames: 1712128. Throughput: 0: 645.4. Samples: 429158. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
375 |
+
[2023-02-22 19:50:10,336][06183] Avg episode reward: [(0, '4.364')]
|
376 |
+
[2023-02-22 19:50:10,860][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000419_1716224.pth...
|
377 |
+
[2023-02-22 19:50:11,765][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000160_655360.pth
|
378 |
+
[2023-02-22 19:50:12,172][15000] Updated weights for policy 0, policy_version 420 (0.0067)
|
379 |
+
[2023-02-22 19:50:15,329][06183] Fps is (10 sec: 2457.7, 60 sec: 2730.7, 300 sec: 4984.6). Total num frames: 1728512. Throughput: 0: 632.7. Samples: 431000. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
380 |
+
[2023-02-22 19:50:15,364][06183] Avg episode reward: [(0, '4.379')]
|
381 |
+
[2023-02-22 19:50:20,329][06183] Fps is (10 sec: 2867.2, 60 sec: 2662.4, 300 sec: 4887.4). Total num frames: 1740800. Throughput: 0: 619.5. Samples: 435048. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
382 |
+
[2023-02-22 19:50:20,341][06183] Avg episode reward: [(0, '4.567')]
|
383 |
+
[2023-02-22 19:50:25,329][06183] Fps is (10 sec: 2867.2, 60 sec: 2662.4, 300 sec: 4804.1). Total num frames: 1757184. Throughput: 0: 630.9. Samples: 439958. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
384 |
+
[2023-02-22 19:50:25,343][06183] Avg episode reward: [(0, '4.502')]
|
385 |
+
[2023-02-22 19:50:25,673][15000] Updated weights for policy 0, policy_version 430 (0.0074)
|
386 |
+
[2023-02-22 19:50:30,329][06183] Fps is (10 sec: 3276.7, 60 sec: 2662.4, 300 sec: 4720.8). Total num frames: 1773568. Throughput: 0: 635.1. Samples: 442260. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
387 |
+
[2023-02-22 19:50:30,395][06183] Avg episode reward: [(0, '4.370')]
|
388 |
+
[2023-02-22 19:50:35,329][06183] Fps is (10 sec: 2867.2, 60 sec: 2594.2, 300 sec: 4623.6). Total num frames: 1785856. Throughput: 0: 630.1. Samples: 446080. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
389 |
+
[2023-02-22 19:50:35,342][06183] Avg episode reward: [(0, '4.403')]
|
390 |
+
[2023-02-22 19:50:40,329][06183] Fps is (10 sec: 2048.1, 60 sec: 2525.9, 300 sec: 4512.5). Total num frames: 1794048. Throughput: 0: 641.6. Samples: 449380. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
391 |
+
[2023-02-22 19:50:40,377][06183] Avg episode reward: [(0, '4.452')]
|
392 |
+
[2023-02-22 19:50:42,687][15000] Updated weights for policy 0, policy_version 440 (0.0068)
|
393 |
+
[2023-02-22 19:50:45,329][06183] Fps is (10 sec: 2047.9, 60 sec: 2457.6, 300 sec: 4429.2). Total num frames: 1806336. Throughput: 0: 647.2. Samples: 451214. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
394 |
+
[2023-02-22 19:50:45,345][06183] Avg episode reward: [(0, '4.428')]
|
395 |
+
[2023-02-22 19:50:50,329][06183] Fps is (10 sec: 2457.5, 60 sec: 2457.6, 300 sec: 4332.0). Total num frames: 1818624. Throughput: 0: 649.9. Samples: 454858. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
396 |
+
[2023-02-22 19:50:50,341][06183] Avg episode reward: [(0, '4.342')]
|
397 |
+
[2023-02-22 19:50:55,329][06183] Fps is (10 sec: 2457.7, 60 sec: 2525.9, 300 sec: 4248.7). Total num frames: 1830912. Throughput: 0: 657.8. Samples: 458758. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
398 |
+
[2023-02-22 19:50:55,341][06183] Avg episode reward: [(0, '4.225')]
|
399 |
+
[2023-02-22 19:50:57,894][15000] Updated weights for policy 0, policy_version 450 (0.0094)
|
400 |
+
[2023-02-22 19:51:00,329][06183] Fps is (10 sec: 2867.2, 60 sec: 2594.1, 300 sec: 4179.3). Total num frames: 1847296. Throughput: 0: 668.9. Samples: 461102. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
401 |
+
[2023-02-22 19:51:00,364][06183] Avg episode reward: [(0, '4.264')]
|
402 |
+
[2023-02-22 19:51:05,330][06183] Fps is (10 sec: 2867.1, 60 sec: 2594.1, 300 sec: 4082.1). Total num frames: 1859584. Throughput: 0: 658.2. Samples: 464666. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
403 |
+
[2023-02-22 19:51:05,347][06183] Avg episode reward: [(0, '4.306')]
|
404 |
+
[2023-02-22 19:51:10,329][06183] Fps is (10 sec: 3276.7, 60 sec: 2798.9, 300 sec: 4026.6). Total num frames: 1880064. Throughput: 0: 672.2. Samples: 470206. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
405 |
+
[2023-02-22 19:51:10,335][06183] Avg episode reward: [(0, '4.395')]
|
406 |
+
[2023-02-22 19:51:10,967][15000] Updated weights for policy 0, policy_version 460 (0.0063)
|
407 |
+
[2023-02-22 19:51:15,329][06183] Fps is (10 sec: 3686.5, 60 sec: 2798.9, 300 sec: 3943.3). Total num frames: 1896448. Throughput: 0: 689.0. Samples: 473264. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
408 |
+
[2023-02-22 19:51:15,336][06183] Avg episode reward: [(0, '4.431')]
|
409 |
+
[2023-02-22 19:51:20,329][06183] Fps is (10 sec: 3276.9, 60 sec: 2867.2, 300 sec: 3887.7). Total num frames: 1912832. Throughput: 0: 702.5. Samples: 477694. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
410 |
+
[2023-02-22 19:51:20,337][06183] Avg episode reward: [(0, '4.327')]
|
411 |
+
[2023-02-22 19:51:22,882][15000] Updated weights for policy 0, policy_version 470 (0.0076)
|
412 |
+
[2023-02-22 19:51:25,329][06183] Fps is (10 sec: 3276.8, 60 sec: 2867.2, 300 sec: 3832.2). Total num frames: 1929216. Throughput: 0: 745.9. Samples: 482946. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
413 |
+
[2023-02-22 19:51:25,342][06183] Avg episode reward: [(0, '4.400')]
|
414 |
+
[2023-02-22 19:51:30,329][06183] Fps is (10 sec: 3276.7, 60 sec: 2867.2, 300 sec: 3776.6). Total num frames: 1945600. Throughput: 0: 757.2. Samples: 485290. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
415 |
+
[2023-02-22 19:51:30,339][06183] Avg episode reward: [(0, '4.355')]
|
416 |
+
[2023-02-22 19:51:35,188][15000] Updated weights for policy 0, policy_version 480 (0.0058)
|
417 |
+
[2023-02-22 19:51:35,328][06183] Fps is (10 sec: 3686.6, 60 sec: 3003.8, 300 sec: 3721.1). Total num frames: 1966080. Throughput: 0: 794.8. Samples: 490622. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
418 |
+
[2023-02-22 19:51:35,335][06183] Avg episode reward: [(0, '4.469')]
|
419 |
+
[2023-02-22 19:51:40,330][06183] Fps is (10 sec: 3686.2, 60 sec: 3140.2, 300 sec: 3679.4). Total num frames: 1982464. Throughput: 0: 831.6. Samples: 496182. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
420 |
+
[2023-02-22 19:51:40,352][06183] Avg episode reward: [(0, '4.530')]
|
421 |
+
[2023-02-22 19:51:45,329][06183] Fps is (10 sec: 3686.3, 60 sec: 3276.8, 300 sec: 3651.7). Total num frames: 2002944. Throughput: 0: 837.7. Samples: 498800. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
422 |
+
[2023-02-22 19:51:45,337][06183] Avg episode reward: [(0, '4.628')]
|
423 |
+
[2023-02-22 19:51:45,347][14984] Saving new best policy, reward=4.628!
|
424 |
+
[2023-02-22 19:51:46,467][15000] Updated weights for policy 0, policy_version 490 (0.0040)
|
425 |
+
[2023-02-22 19:51:50,329][06183] Fps is (10 sec: 3686.7, 60 sec: 3345.1, 300 sec: 3610.0). Total num frames: 2019328. Throughput: 0: 884.9. Samples: 504488. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
426 |
+
[2023-02-22 19:51:50,334][06183] Avg episode reward: [(0, '4.772')]
|
427 |
+
[2023-02-22 19:51:50,507][14984] Saving new best policy, reward=4.772!
|
428 |
+
[2023-02-22 19:51:55,330][06183] Fps is (10 sec: 3686.2, 60 sec: 3481.6, 300 sec: 3582.3). Total num frames: 2039808. Throughput: 0: 886.4. Samples: 510092. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
429 |
+
[2023-02-22 19:51:55,337][06183] Avg episode reward: [(0, '4.618')]
|
430 |
+
[2023-02-22 19:51:57,362][15000] Updated weights for policy 0, policy_version 500 (0.0057)
|
431 |
+
[2023-02-22 19:52:00,330][06183] Fps is (10 sec: 3686.0, 60 sec: 3481.5, 300 sec: 3540.6). Total num frames: 2056192. Throughput: 0: 879.3. Samples: 512832. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
432 |
+
[2023-02-22 19:52:00,348][06183] Avg episode reward: [(0, '4.444')]
|
433 |
+
[2023-02-22 19:52:05,330][06183] Fps is (10 sec: 3276.7, 60 sec: 3549.8, 300 sec: 3485.1). Total num frames: 2072576. Throughput: 0: 892.3. Samples: 517850. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
434 |
+
[2023-02-22 19:52:05,337][06183] Avg episode reward: [(0, '4.425')]
|
435 |
+
[2023-02-22 19:52:08,766][15000] Updated weights for policy 0, policy_version 510 (0.0052)
|
436 |
+
[2023-02-22 19:52:10,329][06183] Fps is (10 sec: 3686.8, 60 sec: 3549.9, 300 sec: 3471.2). Total num frames: 2093056. Throughput: 0: 905.5. Samples: 523694. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
437 |
+
[2023-02-22 19:52:10,336][06183] Avg episode reward: [(0, '4.350')]
|
438 |
+
[2023-02-22 19:52:10,415][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000511_2093056.pth...
|
439 |
+
[2023-02-22 19:52:11,271][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000320_1310720.pth
|
440 |
+
[2023-02-22 19:52:15,329][06183] Fps is (10 sec: 3686.7, 60 sec: 3549.9, 300 sec: 3457.3). Total num frames: 2109440. Throughput: 0: 914.8. Samples: 526456. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
441 |
+
[2023-02-22 19:52:15,345][06183] Avg episode reward: [(0, '4.429')]
|
442 |
+
[2023-02-22 19:52:20,329][06183] Fps is (10 sec: 3276.9, 60 sec: 3549.9, 300 sec: 3429.5). Total num frames: 2125824. Throughput: 0: 907.8. Samples: 531472. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
443 |
+
[2023-02-22 19:52:20,334][06183] Avg episode reward: [(0, '4.466')]
|
444 |
+
[2023-02-22 19:52:20,385][15000] Updated weights for policy 0, policy_version 520 (0.0075)
|
445 |
+
[2023-02-22 19:52:25,329][06183] Fps is (10 sec: 3686.3, 60 sec: 3618.1, 300 sec: 3443.4). Total num frames: 2146304. Throughput: 0: 905.6. Samples: 536934. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
446 |
+
[2023-02-22 19:52:25,339][06183] Avg episode reward: [(0, '4.353')]
|
447 |
+
[2023-02-22 19:52:30,330][06183] Fps is (10 sec: 3686.0, 60 sec: 3618.1, 300 sec: 3429.5). Total num frames: 2162688. Throughput: 0: 904.3. Samples: 539496. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
448 |
+
[2023-02-22 19:52:30,337][06183] Avg episode reward: [(0, '4.417')]
|
449 |
+
[2023-02-22 19:52:31,552][15000] Updated weights for policy 0, policy_version 530 (0.0052)
|
450 |
+
[2023-02-22 19:52:35,330][06183] Fps is (10 sec: 3686.1, 60 sec: 3618.0, 300 sec: 3415.6). Total num frames: 2183168. Throughput: 0: 902.2. Samples: 545088. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
451 |
+
[2023-02-22 19:52:35,340][06183] Avg episode reward: [(0, '4.525')]
|
452 |
+
[2023-02-22 19:52:40,329][06183] Fps is (10 sec: 3686.7, 60 sec: 3618.2, 300 sec: 3401.8). Total num frames: 2199552. Throughput: 0: 897.8. Samples: 550492. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
453 |
+
[2023-02-22 19:52:40,338][06183] Avg episode reward: [(0, '4.405')]
|
454 |
+
[2023-02-22 19:52:43,172][15000] Updated weights for policy 0, policy_version 540 (0.0057)
|
455 |
+
[2023-02-22 19:52:45,329][06183] Fps is (10 sec: 3277.1, 60 sec: 3549.9, 300 sec: 3387.9). Total num frames: 2215936. Throughput: 0: 894.9. Samples: 553102. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
456 |
+
[2023-02-22 19:52:45,341][06183] Avg episode reward: [(0, '4.299')]
|
457 |
+
[2023-02-22 19:52:50,334][06183] Fps is (10 sec: 3684.6, 60 sec: 3617.8, 300 sec: 3387.8). Total num frames: 2236416. Throughput: 0: 904.6. Samples: 558560. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
458 |
+
[2023-02-22 19:52:50,417][06183] Avg episode reward: [(0, '4.354')]
|
459 |
+
[2023-02-22 19:52:55,198][15000] Updated weights for policy 0, policy_version 550 (0.0059)
|
460 |
+
[2023-02-22 19:52:55,329][06183] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3346.2). Total num frames: 2252800. Throughput: 0: 877.9. Samples: 563198. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
461 |
+
[2023-02-22 19:52:55,334][06183] Avg episode reward: [(0, '4.544')]
|
462 |
+
[2023-02-22 19:53:00,330][06183] Fps is (10 sec: 3278.2, 60 sec: 3549.9, 300 sec: 3318.4). Total num frames: 2269184. Throughput: 0: 875.5. Samples: 565856. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
463 |
+
[2023-02-22 19:53:00,338][06183] Avg episode reward: [(0, '4.607')]
|
464 |
+
[2023-02-22 19:53:05,329][06183] Fps is (10 sec: 3276.8, 60 sec: 3549.9, 300 sec: 3304.6). Total num frames: 2285568. Throughput: 0: 883.9. Samples: 571248. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
465 |
+
[2023-02-22 19:53:05,336][06183] Avg episode reward: [(0, '4.433')]
|
466 |
+
[2023-02-22 19:53:06,783][15000] Updated weights for policy 0, policy_version 560 (0.0075)
|
467 |
+
[2023-02-22 19:53:10,329][06183] Fps is (10 sec: 3686.9, 60 sec: 3549.9, 300 sec: 3290.7). Total num frames: 2306048. Throughput: 0: 882.0. Samples: 576624. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
468 |
+
[2023-02-22 19:53:10,337][06183] Avg episode reward: [(0, '4.360')]
|
469 |
+
[2023-02-22 19:53:15,329][06183] Fps is (10 sec: 3686.5, 60 sec: 3549.9, 300 sec: 3276.8). Total num frames: 2322432. Throughput: 0: 885.4. Samples: 579336. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
470 |
+
[2023-02-22 19:53:15,338][06183] Avg episode reward: [(0, '4.393')]
|
471 |
+
[2023-02-22 19:53:17,881][15000] Updated weights for policy 0, policy_version 570 (0.0045)
|
472 |
+
[2023-02-22 19:53:20,329][06183] Fps is (10 sec: 3686.4, 60 sec: 3618.1, 300 sec: 3276.8). Total num frames: 2342912. Throughput: 0: 883.7. Samples: 584852. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
473 |
+
[2023-02-22 19:53:20,335][06183] Avg episode reward: [(0, '4.461')]
|
474 |
+
[2023-02-22 19:53:25,329][06183] Fps is (10 sec: 3686.3, 60 sec: 3549.9, 300 sec: 3262.9). Total num frames: 2359296. Throughput: 0: 881.6. Samples: 590162. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
475 |
+
[2023-02-22 19:53:25,338][06183] Avg episode reward: [(0, '4.526')]
|
476 |
+
[2023-02-22 19:53:29,477][15000] Updated weights for policy 0, policy_version 580 (0.0044)
|
477 |
+
[2023-02-22 19:53:30,329][06183] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3235.1). Total num frames: 2375680. Throughput: 0: 883.1. Samples: 592842. Policy #0 lag: (min: 0.0, avg: 0.4, max: 1.0)
|
478 |
+
[2023-02-22 19:53:30,340][06183] Avg episode reward: [(0, '4.525')]
|
479 |
+
[2023-02-22 19:53:35,329][06183] Fps is (10 sec: 3686.2, 60 sec: 3549.9, 300 sec: 3235.1). Total num frames: 2396160. Throughput: 0: 882.5. Samples: 598268. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
480 |
+
[2023-02-22 19:53:35,337][06183] Avg episode reward: [(0, '4.468')]
|
481 |
+
[2023-02-22 19:53:40,329][06183] Fps is (10 sec: 3686.4, 60 sec: 3549.9, 300 sec: 3221.3). Total num frames: 2412544. Throughput: 0: 903.2. Samples: 603844. Policy #0 lag: (min: 0.0, avg: 0.5, max: 1.0)
|
482 |
+
[2023-02-22 19:53:40,336][06183] Avg episode reward: [(0, '4.403')]
|
483 |
+
[2023-02-22 19:53:40,527][15000] Updated weights for policy 0, policy_version 590 (0.0061)
|
484 |
+
[2023-02-22 19:53:45,329][06183] Fps is (10 sec: 3686.6, 60 sec: 3618.1, 300 sec: 3221.3). Total num frames: 2433024. Throughput: 0: 906.1. Samples: 606630. Policy #0 lag: (min: 0.0, avg: 0.4, max: 2.0)
|
485 |
+
[2023-02-22 19:53:45,345][06183] Avg episode reward: [(0, '4.392')]
|
486 |
+
[2023-02-22 19:53:50,329][06183] Fps is (10 sec: 3686.5, 60 sec: 3550.2, 300 sec: 3207.4). Total num frames: 2449408. Throughput: 0: 912.9. Samples: 612328. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
487 |
+
[2023-02-22 19:53:50,337][06183] Avg episode reward: [(0, '4.578')]
|
488 |
+
[2023-02-22 19:53:52,123][15000] Updated weights for policy 0, policy_version 600 (0.0062)
|
489 |
+
[2023-02-22 19:53:55,329][06183] Fps is (10 sec: 3276.7, 60 sec: 3549.8, 300 sec: 3193.5). Total num frames: 2465792. Throughput: 0: 889.9. Samples: 616670. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
490 |
+
[2023-02-22 19:53:55,344][06183] Avg episode reward: [(0, '4.591')]
|
491 |
+
[2023-02-22 19:54:00,329][06183] Fps is (10 sec: 3276.7, 60 sec: 3549.9, 300 sec: 3179.6). Total num frames: 2482176. Throughput: 0: 880.1. Samples: 618942. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
492 |
+
[2023-02-22 19:54:00,345][06183] Avg episode reward: [(0, '4.686')]
|
493 |
+
[2023-02-22 19:54:05,329][06183] Fps is (10 sec: 2457.5, 60 sec: 3413.3, 300 sec: 3179.6). Total num frames: 2490368. Throughput: 0: 846.7. Samples: 622956. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
494 |
+
[2023-02-22 19:54:05,342][06183] Avg episode reward: [(0, '4.477')]
|
495 |
+
[2023-02-22 19:54:07,315][15000] Updated weights for policy 0, policy_version 610 (0.0059)
|
496 |
+
[2023-02-22 19:54:10,328][06183] Fps is (10 sec: 3686.7, 60 sec: 3549.9, 300 sec: 3235.2). Total num frames: 2519040. Throughput: 0: 876.1. Samples: 629588. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
497 |
+
[2023-02-22 19:54:10,330][06183] Avg episode reward: [(0, '4.442')]
|
498 |
+
[2023-02-22 19:54:10,342][06183] Components not started: RolloutWorker_w4, wait_time=600.1 seconds
|
499 |
+
[2023-02-22 19:54:10,553][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000616_2523136.pth...
|
500 |
+
[2023-02-22 19:54:10,749][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000419_1716224.pth
|
501 |
+
[2023-02-22 19:54:12,412][15000] Updated weights for policy 0, policy_version 620 (0.0013)
|
502 |
+
[2023-02-22 19:54:15,328][06183] Fps is (10 sec: 7373.7, 60 sec: 4027.8, 300 sec: 3332.3). Total num frames: 2564096. Throughput: 0: 965.9. Samples: 636308. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
503 |
+
[2023-02-22 19:54:15,330][06183] Avg episode reward: [(0, '4.427')]
|
504 |
+
[2023-02-22 19:54:17,302][15000] Updated weights for policy 0, policy_version 630 (0.0011)
|
505 |
+
[2023-02-22 19:54:20,328][06183] Fps is (10 sec: 8601.6, 60 sec: 4369.1, 300 sec: 3415.7). Total num frames: 2605056. Throughput: 0: 1122.4. Samples: 648774. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0)
|
506 |
+
[2023-02-22 19:54:20,330][06183] Avg episode reward: [(0, '4.489')]
|
507 |
+
[2023-02-22 19:54:22,046][15000] Updated weights for policy 0, policy_version 640 (0.0014)
|
508 |
+
[2023-02-22 19:54:25,328][06183] Fps is (10 sec: 8191.8, 60 sec: 4778.7, 300 sec: 3499.0). Total num frames: 2646016. Throughput: 0: 1274.0. Samples: 661174. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
509 |
+
[2023-02-22 19:54:25,332][06183] Avg episode reward: [(0, '4.348')]
|
510 |
+
[2023-02-22 19:54:27,020][15000] Updated weights for policy 0, policy_version 650 (0.0015)
|
511 |
+
[2023-02-22 19:54:30,328][06183] Fps is (10 sec: 8192.0, 60 sec: 5188.3, 300 sec: 3582.3). Total num frames: 2686976. Throughput: 0: 1352.3. Samples: 667484. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
512 |
+
[2023-02-22 19:54:30,332][06183] Avg episode reward: [(0, '4.599')]
|
513 |
+
[2023-02-22 19:54:32,143][15000] Updated weights for policy 0, policy_version 660 (0.0013)
|
514 |
+
[2023-02-22 19:54:35,328][06183] Fps is (10 sec: 8192.2, 60 sec: 5529.7, 300 sec: 3679.5). Total num frames: 2727936. Throughput: 0: 1505.4. Samples: 680070. Policy #0 lag: (min: 0.0, avg: 1.0, max: 2.0)
|
515 |
+
[2023-02-22 19:54:35,331][06183] Avg episode reward: [(0, '4.600')]
|
516 |
+
[2023-02-22 19:54:36,880][15000] Updated weights for policy 0, policy_version 670 (0.0015)
|
517 |
+
[2023-02-22 19:54:40,328][06183] Fps is (10 sec: 8192.1, 60 sec: 5939.3, 300 sec: 3762.8). Total num frames: 2768896. Throughput: 0: 1682.3. Samples: 692374. Policy #0 lag: (min: 0.0, avg: 0.9, max: 1.0)
|
518 |
+
[2023-02-22 19:54:40,330][06183] Avg episode reward: [(0, '4.375')]
|
519 |
+
[2023-02-22 19:54:41,917][15000] Updated weights for policy 0, policy_version 680 (0.0014)
|
520 |
+
[2023-02-22 19:54:45,328][06183] Fps is (10 sec: 8191.8, 60 sec: 6280.6, 300 sec: 3860.0). Total num frames: 2809856. Throughput: 0: 1771.9. Samples: 698674. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
521 |
+
[2023-02-22 19:54:45,335][06183] Avg episode reward: [(0, '4.540')]
|
522 |
+
[2023-02-22 19:54:47,030][15000] Updated weights for policy 0, policy_version 690 (0.0017)
|
523 |
+
[2023-02-22 19:54:50,329][06183] Fps is (10 sec: 8191.7, 60 sec: 6690.2, 300 sec: 3971.0). Total num frames: 2850816. Throughput: 0: 1943.1. Samples: 710392. Policy #0 lag: (min: 0.0, avg: 0.9, max: 2.0)
|
524 |
+
[2023-02-22 19:54:50,332][06183] Avg episode reward: [(0, '4.672')]
|
525 |
+
[2023-02-22 19:54:52,306][15000] Updated weights for policy 0, policy_version 700 (0.0014)
|
526 |
+
[2023-02-22 19:54:55,328][06183] Fps is (10 sec: 8192.0, 60 sec: 7099.8, 300 sec: 4068.2). Total num frames: 2891776. Throughput: 0: 2059.7. Samples: 722274. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
527 |
+
[2023-02-22 19:54:55,331][06183] Avg episode reward: [(0, '4.302')]
|
528 |
+
[2023-02-22 19:54:57,507][15000] Updated weights for policy 0, policy_version 710 (0.0016)
|
529 |
+
[2023-02-22 19:55:00,329][06183] Fps is (10 sec: 7782.1, 60 sec: 7441.1, 300 sec: 4151.5). Total num frames: 2928640. Throughput: 0: 2040.9. Samples: 728152. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
530 |
+
[2023-02-22 19:55:00,332][06183] Avg episode reward: [(0, '4.465')]
|
531 |
+
[2023-02-22 19:55:02,833][15000] Updated weights for policy 0, policy_version 720 (0.0017)
|
532 |
+
[2023-02-22 19:55:05,328][06183] Fps is (10 sec: 7372.9, 60 sec: 7919.1, 300 sec: 4248.7). Total num frames: 2965504. Throughput: 0: 2017.5. Samples: 739562. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
533 |
+
[2023-02-22 19:55:05,332][06183] Avg episode reward: [(0, '4.500')]
|
534 |
+
[2023-02-22 19:55:08,366][15000] Updated weights for policy 0, policy_version 730 (0.0022)
|
535 |
+
[2023-02-22 19:55:10,328][06183] Fps is (10 sec: 7373.3, 60 sec: 8055.5, 300 sec: 4318.2). Total num frames: 3002368. Throughput: 0: 1981.9. Samples: 750360. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
536 |
+
[2023-02-22 19:55:10,332][06183] Avg episode reward: [(0, '4.632')]
|
537 |
+
[2023-02-22 19:55:13,945][15000] Updated weights for policy 0, policy_version 740 (0.0016)
|
538 |
+
[2023-02-22 19:55:15,328][06183] Fps is (10 sec: 7372.8, 60 sec: 7918.9, 300 sec: 4401.5). Total num frames: 3039232. Throughput: 0: 1972.0. Samples: 756222. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
539 |
+
[2023-02-22 19:55:15,331][06183] Avg episode reward: [(0, '4.527')]
|
540 |
+
[2023-02-22 19:55:19,232][15000] Updated weights for policy 0, policy_version 750 (0.0015)
|
541 |
+
[2023-02-22 19:55:20,328][06183] Fps is (10 sec: 7782.3, 60 sec: 7918.9, 300 sec: 4484.8). Total num frames: 3080192. Throughput: 0: 1945.2. Samples: 767604. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
542 |
+
[2023-02-22 19:55:20,332][06183] Avg episode reward: [(0, '4.350')]
|
543 |
+
[2023-02-22 19:55:25,240][15000] Updated weights for policy 0, policy_version 760 (0.0023)
|
544 |
+
[2023-02-22 19:55:25,329][06183] Fps is (10 sec: 7372.6, 60 sec: 7782.4, 300 sec: 4540.3). Total num frames: 3112960. Throughput: 0: 1905.4. Samples: 778120. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
545 |
+
[2023-02-22 19:55:25,331][06183] Avg episode reward: [(0, '4.305')]
|
546 |
+
[2023-02-22 19:55:30,328][06183] Fps is (10 sec: 6553.6, 60 sec: 7645.8, 300 sec: 4609.7). Total num frames: 3145728. Throughput: 0: 1884.2. Samples: 783462. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
547 |
+
[2023-02-22 19:55:30,332][06183] Avg episode reward: [(0, '4.478')]
|
548 |
+
[2023-02-22 19:55:30,929][15000] Updated weights for policy 0, policy_version 770 (0.0016)
|
549 |
+
[2023-02-22 19:55:35,328][06183] Fps is (10 sec: 6963.3, 60 sec: 7577.6, 300 sec: 4706.9). Total num frames: 3182592. Throughput: 0: 1848.1. Samples: 793558. Policy #0 lag: (min: 0.0, avg: 0.8, max: 2.0)
|
550 |
+
[2023-02-22 19:55:35,331][06183] Avg episode reward: [(0, '4.278')]
|
551 |
+
[2023-02-22 19:55:36,915][15000] Updated weights for policy 0, policy_version 780 (0.0023)
|
552 |
+
[2023-02-22 19:55:40,328][06183] Fps is (10 sec: 7372.9, 60 sec: 7509.3, 300 sec: 4790.3). Total num frames: 3219456. Throughput: 0: 1828.2. Samples: 804542. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
553 |
+
[2023-02-22 19:55:40,331][06183] Avg episode reward: [(0, '4.435')]
|
554 |
+
[2023-02-22 19:55:42,396][15000] Updated weights for policy 0, policy_version 790 (0.0015)
|
555 |
+
[2023-02-22 19:55:45,328][06183] Fps is (10 sec: 7373.0, 60 sec: 7441.1, 300 sec: 4873.6). Total num frames: 3256320. Throughput: 0: 1819.0. Samples: 810008. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
556 |
+
[2023-02-22 19:55:45,332][06183] Avg episode reward: [(0, '4.464')]
|
557 |
+
[2023-02-22 19:55:48,358][15000] Updated weights for policy 0, policy_version 800 (0.0015)
|
558 |
+
[2023-02-22 19:55:50,328][06183] Fps is (10 sec: 6963.1, 60 sec: 7304.6, 300 sec: 4943.0). Total num frames: 3289088. Throughput: 0: 1798.2. Samples: 820480. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
559 |
+
[2023-02-22 19:55:50,333][06183] Avg episode reward: [(0, '4.343')]
|
560 |
+
[2023-02-22 19:55:54,485][15000] Updated weights for policy 0, policy_version 810 (0.0015)
|
561 |
+
[2023-02-22 19:55:55,328][06183] Fps is (10 sec: 6553.5, 60 sec: 7168.0, 300 sec: 4998.5). Total num frames: 3321856. Throughput: 0: 1776.1. Samples: 830284. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
562 |
+
[2023-02-22 19:55:55,331][06183] Avg episode reward: [(0, '4.378')]
|
563 |
+
[2023-02-22 19:56:00,328][06183] Fps is (10 sec: 6553.6, 60 sec: 7099.8, 300 sec: 5067.9). Total num frames: 3354624. Throughput: 0: 1762.8. Samples: 835550. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
564 |
+
[2023-02-22 19:56:00,333][06183] Avg episode reward: [(0, '4.323')]
|
565 |
+
[2023-02-22 19:56:00,418][15000] Updated weights for policy 0, policy_version 820 (0.0019)
|
566 |
+
[2023-02-22 19:56:05,329][06183] Fps is (10 sec: 6553.3, 60 sec: 7031.4, 300 sec: 5109.6). Total num frames: 3387392. Throughput: 0: 1734.9. Samples: 845674. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
567 |
+
[2023-02-22 19:56:05,331][06183] Avg episode reward: [(0, '4.397')]
|
568 |
+
[2023-02-22 19:56:06,625][15000] Updated weights for policy 0, policy_version 830 (0.0022)
|
569 |
+
[2023-02-22 19:56:10,328][06183] Fps is (10 sec: 6553.6, 60 sec: 6963.2, 300 sec: 5165.1). Total num frames: 3420160. Throughput: 0: 1722.4. Samples: 855626. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
570 |
+
[2023-02-22 19:56:10,332][06183] Avg episode reward: [(0, '4.600')]
|
571 |
+
[2023-02-22 19:56:10,485][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000836_3424256.pth...
|
572 |
+
[2023-02-22 19:56:10,930][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000511_2093056.pth
|
573 |
+
[2023-02-22 19:56:12,963][15000] Updated weights for policy 0, policy_version 840 (0.0019)
|
574 |
+
[2023-02-22 19:56:15,328][06183] Fps is (10 sec: 6963.5, 60 sec: 6963.2, 300 sec: 5234.6). Total num frames: 3457024. Throughput: 0: 1707.0. Samples: 860276. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
575 |
+
[2023-02-22 19:56:15,332][06183] Avg episode reward: [(0, '4.545')]
|
576 |
+
[2023-02-22 19:56:19,252][15000] Updated weights for policy 0, policy_version 850 (0.0022)
|
577 |
+
[2023-02-22 19:56:20,328][06183] Fps is (10 sec: 6553.6, 60 sec: 6758.4, 300 sec: 5276.2). Total num frames: 3485696. Throughput: 0: 1700.8. Samples: 870094. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
578 |
+
[2023-02-22 19:56:20,331][06183] Avg episode reward: [(0, '4.351')]
|
579 |
+
[2023-02-22 19:56:25,328][06183] Fps is (10 sec: 6143.9, 60 sec: 6758.4, 300 sec: 5331.8). Total num frames: 3518464. Throughput: 0: 1677.1. Samples: 880014. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
580 |
+
[2023-02-22 19:56:25,332][06183] Avg episode reward: [(0, '4.440')]
|
581 |
+
[2023-02-22 19:56:25,388][15000] Updated weights for policy 0, policy_version 860 (0.0021)
|
582 |
+
[2023-02-22 19:56:30,328][06183] Fps is (10 sec: 6553.6, 60 sec: 6758.4, 300 sec: 5373.4). Total num frames: 3551232. Throughput: 0: 1667.3. Samples: 885036. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
583 |
+
[2023-02-22 19:56:30,332][06183] Avg episode reward: [(0, '4.578')]
|
584 |
+
[2023-02-22 19:56:31,523][15000] Updated weights for policy 0, policy_version 870 (0.0021)
|
585 |
+
[2023-02-22 19:56:35,328][06183] Fps is (10 sec: 6963.2, 60 sec: 6758.4, 300 sec: 5442.8). Total num frames: 3588096. Throughput: 0: 1658.0. Samples: 895088. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
586 |
+
[2023-02-22 19:56:35,331][06183] Avg episode reward: [(0, '4.614')]
|
587 |
+
[2023-02-22 19:56:37,650][15000] Updated weights for policy 0, policy_version 880 (0.0024)
|
588 |
+
[2023-02-22 19:56:40,328][06183] Fps is (10 sec: 6963.3, 60 sec: 6690.1, 300 sec: 5484.5). Total num frames: 3620864. Throughput: 0: 1661.6. Samples: 905056. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
589 |
+
[2023-02-22 19:56:40,335][06183] Avg episode reward: [(0, '4.402')]
|
590 |
+
[2023-02-22 19:56:43,775][15000] Updated weights for policy 0, policy_version 890 (0.0017)
|
591 |
+
[2023-02-22 19:56:45,328][06183] Fps is (10 sec: 6553.6, 60 sec: 6621.8, 300 sec: 5540.0). Total num frames: 3653632. Throughput: 0: 1656.3. Samples: 910082. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
592 |
+
[2023-02-22 19:56:45,333][06183] Avg episode reward: [(0, '4.616')]
|
593 |
+
[2023-02-22 19:56:50,328][06183] Fps is (10 sec: 6553.5, 60 sec: 6621.9, 300 sec: 5581.7). Total num frames: 3686400. Throughput: 0: 1642.3. Samples: 919578. Policy #0 lag: (min: 0.0, avg: 0.8, max: 1.0)
|
594 |
+
[2023-02-22 19:56:50,332][15000] Updated weights for policy 0, policy_version 900 (0.0030)
|
595 |
+
[2023-02-22 19:56:50,332][06183] Avg episode reward: [(0, '4.342')]
|
596 |
+
[2023-02-22 19:56:55,329][06183] Fps is (10 sec: 6143.7, 60 sec: 6553.5, 300 sec: 5623.3). Total num frames: 3715072. Throughput: 0: 1636.5. Samples: 929270. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
597 |
+
[2023-02-22 19:56:55,335][06183] Avg episode reward: [(0, '4.242')]
|
598 |
+
[2023-02-22 19:56:56,845][15000] Updated weights for policy 0, policy_version 910 (0.0034)
|
599 |
+
[2023-02-22 19:57:00,329][06183] Fps is (10 sec: 5733.8, 60 sec: 6485.2, 300 sec: 5665.0). Total num frames: 3743744. Throughput: 0: 1627.8. Samples: 933528. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
600 |
+
[2023-02-22 19:57:00,333][06183] Avg episode reward: [(0, '4.389')]
|
601 |
+
[2023-02-22 19:57:04,308][15000] Updated weights for policy 0, policy_version 920 (0.0029)
|
602 |
+
[2023-02-22 19:57:05,329][06183] Fps is (10 sec: 5734.5, 60 sec: 6417.1, 300 sec: 5692.7). Total num frames: 3772416. Throughput: 0: 1594.5. Samples: 941848. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
603 |
+
[2023-02-22 19:57:05,339][06183] Avg episode reward: [(0, '4.555')]
|
604 |
+
[2023-02-22 19:57:10,329][06183] Fps is (10 sec: 5325.2, 60 sec: 6280.5, 300 sec: 5720.5). Total num frames: 3796992. Throughput: 0: 1544.1. Samples: 949498. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
605 |
+
[2023-02-22 19:57:10,333][06183] Avg episode reward: [(0, '4.425')]
|
606 |
+
[2023-02-22 19:57:11,788][15000] Updated weights for policy 0, policy_version 930 (0.0033)
|
607 |
+
[2023-02-22 19:57:15,328][06183] Fps is (10 sec: 5734.5, 60 sec: 6212.2, 300 sec: 5776.1). Total num frames: 3829760. Throughput: 0: 1535.6. Samples: 954138. Policy #0 lag: (min: 0.0, avg: 0.6, max: 1.0)
|
608 |
+
[2023-02-22 19:57:15,334][06183] Avg episode reward: [(0, '4.353')]
|
609 |
+
[2023-02-22 19:57:19,282][15000] Updated weights for policy 0, policy_version 940 (0.0031)
|
610 |
+
[2023-02-22 19:57:20,329][06183] Fps is (10 sec: 5734.4, 60 sec: 6144.0, 300 sec: 5790.0). Total num frames: 3854336. Throughput: 0: 1499.3. Samples: 962558. Policy #0 lag: (min: 0.0, avg: 0.7, max: 1.0)
|
611 |
+
[2023-02-22 19:57:20,333][06183] Avg episode reward: [(0, '4.441')]
|
612 |
+
[2023-02-22 19:57:25,329][06183] Fps is (10 sec: 5324.7, 60 sec: 6075.7, 300 sec: 5831.6). Total num frames: 3883008. Throughput: 0: 1461.5. Samples: 970824. Policy #0 lag: (min: 0.0, avg: 0.7, max: 2.0)
|
613 |
+
[2023-02-22 19:57:25,334][06183] Avg episode reward: [(0, '4.174')]
|
614 |
+
[2023-02-22 19:57:26,479][15000] Updated weights for policy 0, policy_version 950 (0.0030)
|
615 |
+
[2023-02-22 19:57:30,328][06183] Fps is (10 sec: 5734.4, 60 sec: 6007.5, 300 sec: 5859.4). Total num frames: 3911680. Throughput: 0: 1446.0. Samples: 975152. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
616 |
+
[2023-02-22 19:57:30,333][06183] Avg episode reward: [(0, '4.382')]
|
617 |
+
[2023-02-22 19:57:33,771][15000] Updated weights for policy 0, policy_version 960 (0.0027)
|
618 |
+
[2023-02-22 19:57:35,329][06183] Fps is (10 sec: 5734.1, 60 sec: 5870.9, 300 sec: 5901.0). Total num frames: 3940352. Throughput: 0: 1420.5. Samples: 983500. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
619 |
+
[2023-02-22 19:57:35,333][06183] Avg episode reward: [(0, '4.299')]
|
620 |
+
[2023-02-22 19:57:40,328][06183] Fps is (10 sec: 5734.4, 60 sec: 5802.6, 300 sec: 5942.7). Total num frames: 3969024. Throughput: 0: 1400.8. Samples: 992306. Policy #0 lag: (min: 0.0, avg: 0.6, max: 2.0)
|
621 |
+
[2023-02-22 19:57:40,334][06183] Avg episode reward: [(0, '4.471')]
|
622 |
+
[2023-02-22 19:57:40,870][15000] Updated weights for policy 0, policy_version 970 (0.0023)
|
623 |
+
[2023-02-22 19:57:45,329][06183] Fps is (10 sec: 5734.8, 60 sec: 5734.4, 300 sec: 5970.6). Total num frames: 3997696. Throughput: 0: 1392.6. Samples: 996192. Policy #0 lag: (min: 0.0, avg: 0.5, max: 2.0)
|
624 |
+
[2023-02-22 19:57:45,334][06183] Avg episode reward: [(0, '4.346')]
|
625 |
+
[2023-02-22 19:57:46,777][14984] Stopping Batcher_0...
|
626 |
+
[2023-02-22 19:57:46,781][14984] Loop batcher_evt_loop terminating...
|
627 |
+
[2023-02-22 19:57:46,787][06183] Component Batcher_0 stopped!
|
628 |
+
[2023-02-22 19:57:46,792][06183] Component RolloutWorker_w4 process died already! Don't wait for it.
|
629 |
+
[2023-02-22 19:57:46,795][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
|
630 |
+
[2023-02-22 19:57:46,811][15003] Stopping RolloutWorker_w3...
|
631 |
+
[2023-02-22 19:57:46,814][15003] Loop rollout_proc3_evt_loop terminating...
|
632 |
+
[2023-02-22 19:57:46,812][06183] Component RolloutWorker_w3 stopped!
|
633 |
+
[2023-02-22 19:57:46,814][15002] Stopping RolloutWorker_w1...
|
634 |
+
[2023-02-22 19:57:46,814][15000] Weights refcount: 2 0
|
635 |
+
[2023-02-22 19:57:46,814][15004] Stopping RolloutWorker_w2...
|
636 |
+
[2023-02-22 19:57:46,815][15008] Stopping RolloutWorker_w5...
|
637 |
+
[2023-02-22 19:57:46,815][15007] Stopping RolloutWorker_w7...
|
638 |
+
[2023-02-22 19:57:46,815][15001] Stopping RolloutWorker_w0...
|
639 |
+
[2023-02-22 19:57:46,815][15006] Stopping RolloutWorker_w6...
|
640 |
+
[2023-02-22 19:57:46,816][15002] Loop rollout_proc1_evt_loop terminating...
|
641 |
+
[2023-02-22 19:57:46,817][15004] Loop rollout_proc2_evt_loop terminating...
|
642 |
+
[2023-02-22 19:57:46,818][15008] Loop rollout_proc5_evt_loop terminating...
|
643 |
+
[2023-02-22 19:57:46,819][15007] Loop rollout_proc7_evt_loop terminating...
|
644 |
+
[2023-02-22 19:57:46,819][15001] Loop rollout_proc0_evt_loop terminating...
|
645 |
+
[2023-02-22 19:57:46,816][06183] Component RolloutWorker_w1 stopped!
|
646 |
+
[2023-02-22 19:57:46,819][15006] Loop rollout_proc6_evt_loop terminating...
|
647 |
+
[2023-02-22 19:57:46,822][15000] Stopping InferenceWorker_p0-w0...
|
648 |
+
[2023-02-22 19:57:46,822][06183] Component RolloutWorker_w2 stopped!
|
649 |
+
[2023-02-22 19:57:46,831][15000] Loop inference_proc0-0_evt_loop terminating...
|
650 |
+
[2023-02-22 19:57:46,831][06183] Component RolloutWorker_w0 stopped!
|
651 |
+
[2023-02-22 19:57:46,839][06183] Component RolloutWorker_w6 stopped!
|
652 |
+
[2023-02-22 19:57:46,843][06183] Component RolloutWorker_w5 stopped!
|
653 |
+
[2023-02-22 19:57:46,851][06183] Component RolloutWorker_w7 stopped!
|
654 |
+
[2023-02-22 19:57:46,858][06183] Component InferenceWorker_p0-w0 stopped!
|
655 |
+
[2023-02-22 19:57:47,288][14984] Removing /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000616_2523136.pth
|
656 |
+
[2023-02-22 19:57:47,326][14984] Saving /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
|
657 |
+
[2023-02-22 19:57:47,774][14984] Stopping LearnerWorker_p0...
|
658 |
+
[2023-02-22 19:57:47,776][14984] Loop learner_proc0_evt_loop terminating...
|
659 |
+
[2023-02-22 19:57:47,774][06183] Component LearnerWorker_p0 stopped!
|
660 |
+
[2023-02-22 19:57:47,780][06183] Waiting for process learner_proc0 to stop...
|
661 |
+
[2023-02-22 19:57:51,038][06183] Waiting for process inference_proc0-0 to join...
|
662 |
+
[2023-02-22 19:57:51,041][06183] Waiting for process rollout_proc0 to join...
|
663 |
+
[2023-02-22 19:57:51,044][06183] Waiting for process rollout_proc1 to join...
|
664 |
+
[2023-02-22 19:57:51,048][06183] Waiting for process rollout_proc2 to join...
|
665 |
+
[2023-02-22 19:57:51,051][06183] Waiting for process rollout_proc3 to join...
|
666 |
+
[2023-02-22 19:57:51,054][06183] Waiting for process rollout_proc4 to join...
|
667 |
+
[2023-02-22 19:57:51,057][06183] Waiting for process rollout_proc5 to join...
|
668 |
+
[2023-02-22 19:57:51,061][06183] Waiting for process rollout_proc6 to join...
|
669 |
+
[2023-02-22 19:57:51,065][06183] Waiting for process rollout_proc7 to join...
|
670 |
+
[2023-02-22 19:57:51,070][06183] Batcher 0 profile tree view:
|
671 |
+
batching: 25.7881, releasing_batches: 0.0596
|
672 |
+
[2023-02-22 19:57:51,073][06183] InferenceWorker_p0-w0 profile tree view:
|
673 |
+
wait_policy: 0.0001
|
674 |
+
wait_policy_total: 12.6567
|
675 |
+
update_model: 11.5028
|
676 |
+
weight_update: 0.0038
|
677 |
+
one_step: 0.0060
|
678 |
+
handle_policy_step: 735.6968
|
679 |
+
deserialize: 18.8574, stack: 3.9381, obs_to_device_normalize: 177.8106, forward: 278.2881, send_messages: 53.0078
|
680 |
+
prepare_outputs: 178.7753
|
681 |
+
to_cpu: 152.6849
|
682 |
+
[2023-02-22 19:57:51,076][06183] Learner 0 profile tree view:
|
683 |
+
misc: 0.0097, prepare_batch: 78.0888
|
684 |
+
train: 160.3988
|
685 |
+
epoch_init: 0.0142, minibatch_init: 0.0160, losses_postprocess: 1.1488, kl_divergence: 1.2237, after_optimizer: 84.7016
|
686 |
+
calculate_losses: 46.2952
|
687 |
+
losses_init: 0.0071, forward_head: 2.9452, bptt_initial: 33.8132, tail: 1.5771, advantages_returns: 0.4771, losses: 3.8496
|
688 |
+
bptt: 3.1942
|
689 |
+
bptt_forward_core: 3.0460
|
690 |
+
update: 26.0565
|
691 |
+
clip: 3.4502
|
692 |
+
[2023-02-22 19:57:51,080][06183] RolloutWorker_w0 profile tree view:
|
693 |
+
wait_for_trajectories: 0.3167, enqueue_policy_requests: 18.2264, env_step: 361.8246, overhead: 28.6136, complete_rollouts: 0.7092
|
694 |
+
save_policy_outputs: 22.1311
|
695 |
+
split_output_tensors: 10.4875
|
696 |
+
[2023-02-22 19:57:51,083][06183] RolloutWorker_w7 profile tree view:
|
697 |
+
wait_for_trajectories: 0.3322, enqueue_policy_requests: 18.1051, env_step: 360.2381, overhead: 28.7457, complete_rollouts: 0.6980
|
698 |
+
save_policy_outputs: 22.5731
|
699 |
+
split_output_tensors: 10.6929
|
700 |
+
[2023-02-22 19:57:51,087][06183] Loop Runner_EvtLoop terminating...
|
701 |
+
[2023-02-22 19:57:51,090][06183] Runner profile tree view:
|
702 |
+
main_loop: 814.7198
|
703 |
+
[2023-02-22 19:57:51,093][06183] Collected {0: 4005888}, FPS: 4916.9
|
704 |
+
[2023-02-22 20:25:32,536][06183] Loading existing experiment configuration from /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/config.json
|
705 |
+
[2023-02-22 20:25:32,541][06183] Overriding arg 'num_workers' with value 1 passed from command line
|
706 |
+
[2023-02-22 20:25:32,544][06183] Adding new argument 'no_render'=True that is not in the saved config file!
|
707 |
+
[2023-02-22 20:25:32,547][06183] Adding new argument 'save_video'=True that is not in the saved config file!
|
708 |
+
[2023-02-22 20:25:32,550][06183] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
709 |
+
[2023-02-22 20:25:32,552][06183] Adding new argument 'video_name'=None that is not in the saved config file!
|
710 |
+
[2023-02-22 20:25:32,553][06183] Adding new argument 'max_num_frames'=1000000000.0 that is not in the saved config file!
|
711 |
+
[2023-02-22 20:25:32,555][06183] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
712 |
+
[2023-02-22 20:25:32,556][06183] Adding new argument 'push_to_hub'=False that is not in the saved config file!
|
713 |
+
[2023-02-22 20:25:32,558][06183] Adding new argument 'hf_repository'=None that is not in the saved config file!
|
714 |
+
[2023-02-22 20:25:32,560][06183] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
715 |
+
[2023-02-22 20:25:32,562][06183] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
716 |
+
[2023-02-22 20:25:32,564][06183] Adding new argument 'train_script'=None that is not in the saved config file!
|
717 |
+
[2023-02-22 20:25:32,566][06183] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
718 |
+
[2023-02-22 20:25:32,568][06183] Using frameskip 1 and render_action_repeat=4 for evaluation
|
719 |
+
[2023-02-22 20:25:32,609][06183] Doom resolution: 160x120, resize resolution: (128, 72)
|
720 |
+
[2023-02-22 20:25:32,617][06183] RunningMeanStd input shape: (3, 72, 128)
|
721 |
+
[2023-02-22 20:25:32,631][06183] RunningMeanStd input shape: (1,)
|
722 |
+
[2023-02-22 20:25:32,717][06183] ConvEncoder: input_channels=3
|
723 |
+
[2023-02-22 20:25:33,575][06183] Conv encoder output size: 512
|
724 |
+
[2023-02-22 20:25:33,578][06183] Policy head output size: 512
|
725 |
+
[2023-02-22 20:25:38,271][06183] Loading state from checkpoint /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
|
726 |
+
[2023-02-22 20:25:42,521][06183] Num frames 100...
|
727 |
+
[2023-02-22 20:25:42,700][06183] Num frames 200...
|
728 |
+
[2023-02-22 20:25:42,879][06183] Num frames 300...
|
729 |
+
[2023-02-22 20:25:43,085][06183] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
730 |
+
[2023-02-22 20:25:43,087][06183] Avg episode reward: 3.840, avg true_objective: 3.840
|
731 |
+
[2023-02-22 20:25:43,117][06183] Num frames 400...
|
732 |
+
[2023-02-22 20:25:43,298][06183] Num frames 500...
|
733 |
+
[2023-02-22 20:25:43,474][06183] Num frames 600...
|
734 |
+
[2023-02-22 20:25:43,647][06183] Num frames 700...
|
735 |
+
[2023-02-22 20:25:43,836][06183] Num frames 800...
|
736 |
+
[2023-02-22 20:25:43,954][06183] Avg episode rewards: #0: 4.660, true rewards: #0: 4.160
|
737 |
+
[2023-02-22 20:25:43,959][06183] Avg episode reward: 4.660, avg true_objective: 4.160
|
738 |
+
[2023-02-22 20:25:44,104][06183] Num frames 900...
|
739 |
+
[2023-02-22 20:25:44,289][06183] Num frames 1000...
|
740 |
+
[2023-02-22 20:25:44,471][06183] Num frames 1100...
|
741 |
+
[2023-02-22 20:25:44,649][06183] Num frames 1200...
|
742 |
+
[2023-02-22 20:25:44,742][06183] Avg episode rewards: #0: 4.387, true rewards: #0: 4.053
|
743 |
+
[2023-02-22 20:25:44,745][06183] Avg episode reward: 4.387, avg true_objective: 4.053
|
744 |
+
[2023-02-22 20:25:44,898][06183] Num frames 1300...
|
745 |
+
[2023-02-22 20:25:45,081][06183] Num frames 1400...
|
746 |
+
[2023-02-22 20:25:45,262][06183] Avg episode rewards: #0: 3.930, true rewards: #0: 3.680
|
747 |
+
[2023-02-22 20:25:45,264][06183] Avg episode reward: 3.930, avg true_objective: 3.680
|
748 |
+
[2023-02-22 20:25:45,323][06183] Num frames 1500...
|
749 |
+
[2023-02-22 20:25:45,500][06183] Num frames 1600...
|
750 |
+
[2023-02-22 20:25:45,677][06183] Num frames 1700...
|
751 |
+
[2023-02-22 20:25:45,855][06183] Num frames 1800...
|
752 |
+
[2023-02-22 20:25:46,035][06183] Num frames 1900...
|
753 |
+
[2023-02-22 20:25:46,130][06183] Avg episode rewards: #0: 4.240, true rewards: #0: 3.840
|
754 |
+
[2023-02-22 20:25:46,132][06183] Avg episode reward: 4.240, avg true_objective: 3.840
|
755 |
+
[2023-02-22 20:25:46,276][06183] Num frames 2000...
|
756 |
+
[2023-02-22 20:25:46,450][06183] Num frames 2100...
|
757 |
+
[2023-02-22 20:25:46,622][06183] Num frames 2200...
|
758 |
+
[2023-02-22 20:25:46,816][06183] Num frames 2300...
|
759 |
+
[2023-02-22 20:25:46,881][06183] Avg episode rewards: #0: 4.173, true rewards: #0: 3.840
|
760 |
+
[2023-02-22 20:25:46,884][06183] Avg episode reward: 4.173, avg true_objective: 3.840
|
761 |
+
[2023-02-22 20:25:47,064][06183] Num frames 2400...
|
762 |
+
[2023-02-22 20:25:47,255][06183] Num frames 2500...
|
763 |
+
[2023-02-22 20:25:47,435][06183] Num frames 2600...
|
764 |
+
[2023-02-22 20:25:47,655][06183] Avg episode rewards: #0: 4.126, true rewards: #0: 3.840
|
765 |
+
[2023-02-22 20:25:47,658][06183] Avg episode reward: 4.126, avg true_objective: 3.840
|
766 |
+
[2023-02-22 20:25:47,686][06183] Num frames 2700...
|
767 |
+
[2023-02-22 20:25:47,876][06183] Num frames 2800...
|
768 |
+
[2023-02-22 20:25:48,054][06183] Num frames 2900...
|
769 |
+
[2023-02-22 20:25:48,238][06183] Num frames 3000...
|
770 |
+
[2023-02-22 20:25:48,411][06183] Num frames 3100...
|
771 |
+
[2023-02-22 20:25:48,479][06183] Avg episode rewards: #0: 4.255, true rewards: #0: 3.880
|
772 |
+
[2023-02-22 20:25:48,481][06183] Avg episode reward: 4.255, avg true_objective: 3.880
|
773 |
+
[2023-02-22 20:25:48,654][06183] Num frames 3200...
|
774 |
+
[2023-02-22 20:25:48,826][06183] Num frames 3300...
|
775 |
+
[2023-02-22 20:25:49,002][06183] Num frames 3400...
|
776 |
+
[2023-02-22 20:25:49,219][06183] Avg episode rewards: #0: 4.209, true rewards: #0: 3.876
|
777 |
+
[2023-02-22 20:25:49,221][06183] Avg episode reward: 4.209, avg true_objective: 3.876
|
778 |
+
[2023-02-22 20:25:49,271][06183] Num frames 3500...
|
779 |
+
[2023-02-22 20:25:49,455][06183] Num frames 3600...
|
780 |
+
[2023-02-22 20:25:49,636][06183] Num frames 3700...
|
781 |
+
[2023-02-22 20:25:49,820][06183] Num frames 3800...
|
782 |
+
[2023-02-22 20:25:49,897][06183] Avg episode rewards: #0: 4.208, true rewards: #0: 3.808
|
783 |
+
[2023-02-22 20:25:49,900][06183] Avg episode reward: 4.208, avg true_objective: 3.808
|
784 |
+
[2023-02-22 20:25:51,533][06183] Replay video saved to /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/replay.mp4!
|
785 |
+
[2023-02-22 20:28:07,610][06183] Loading existing experiment configuration from /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/config.json
|
786 |
+
[2023-02-22 20:28:07,617][06183] Overriding arg 'num_workers' with value 1 passed from command line
|
787 |
+
[2023-02-22 20:28:07,620][06183] Adding new argument 'no_render'=True that is not in the saved config file!
|
788 |
+
[2023-02-22 20:28:07,623][06183] Adding new argument 'save_video'=True that is not in the saved config file!
|
789 |
+
[2023-02-22 20:28:07,625][06183] Adding new argument 'video_frames'=1000000000.0 that is not in the saved config file!
|
790 |
+
[2023-02-22 20:28:07,627][06183] Adding new argument 'video_name'=None that is not in the saved config file!
|
791 |
+
[2023-02-22 20:28:07,630][06183] Adding new argument 'max_num_frames'=100000 that is not in the saved config file!
|
792 |
+
[2023-02-22 20:28:07,632][06183] Adding new argument 'max_num_episodes'=10 that is not in the saved config file!
|
793 |
+
[2023-02-22 20:28:07,634][06183] Adding new argument 'push_to_hub'=True that is not in the saved config file!
|
794 |
+
[2023-02-22 20:28:07,636][06183] Adding new argument 'hf_repository'='chqmatteo/rl_course_vizdoom_health_gathering_supreme' that is not in the saved config file!
|
795 |
+
[2023-02-22 20:28:07,638][06183] Adding new argument 'policy_index'=0 that is not in the saved config file!
|
796 |
+
[2023-02-22 20:28:07,639][06183] Adding new argument 'eval_deterministic'=False that is not in the saved config file!
|
797 |
+
[2023-02-22 20:28:07,640][06183] Adding new argument 'train_script'=None that is not in the saved config file!
|
798 |
+
[2023-02-22 20:28:07,642][06183] Adding new argument 'enjoy_script'=None that is not in the saved config file!
|
799 |
+
[2023-02-22 20:28:07,643][06183] Using frameskip 1 and render_action_repeat=4 for evaluation
|
800 |
+
[2023-02-22 20:28:07,661][06183] RunningMeanStd input shape: (3, 72, 128)
|
801 |
+
[2023-02-22 20:28:07,665][06183] RunningMeanStd input shape: (1,)
|
802 |
+
[2023-02-22 20:28:07,680][06183] ConvEncoder: input_channels=3
|
803 |
+
[2023-02-22 20:28:07,712][06183] Conv encoder output size: 512
|
804 |
+
[2023-02-22 20:28:07,714][06183] Policy head output size: 512
|
805 |
+
[2023-02-22 20:28:07,762][06183] Loading state from checkpoint /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/checkpoint_p0/checkpoint_000000978_4005888.pth...
|
806 |
+
[2023-02-22 20:28:09,216][06183] Num frames 100...
|
807 |
+
[2023-02-22 20:28:09,390][06183] Num frames 200...
|
808 |
+
[2023-02-22 20:28:09,564][06183] Num frames 300...
|
809 |
+
[2023-02-22 20:28:09,775][06183] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
810 |
+
[2023-02-22 20:28:09,777][06183] Avg episode reward: 3.840, avg true_objective: 3.840
|
811 |
+
[2023-02-22 20:28:09,833][06183] Num frames 400...
|
812 |
+
[2023-02-22 20:28:10,015][06183] Num frames 500...
|
813 |
+
[2023-02-22 20:28:10,196][06183] Num frames 600...
|
814 |
+
[2023-02-22 20:28:10,374][06183] Num frames 700...
|
815 |
+
[2023-02-22 20:28:10,551][06183] Avg episode rewards: #0: 3.840, true rewards: #0: 3.840
|
816 |
+
[2023-02-22 20:28:10,555][06183] Avg episode reward: 3.840, avg true_objective: 3.840
|
817 |
+
[2023-02-22 20:28:10,614][06183] Num frames 800...
|
818 |
+
[2023-02-22 20:28:10,799][06183] Num frames 900...
|
819 |
+
[2023-02-22 20:28:10,978][06183] Num frames 1000...
|
820 |
+
[2023-02-22 20:28:11,080][06183] Avg episode rewards: #0: 3.413, true rewards: #0: 3.413
|
821 |
+
[2023-02-22 20:28:11,082][06183] Avg episode reward: 3.413, avg true_objective: 3.413
|
822 |
+
[2023-02-22 20:28:11,220][06183] Num frames 1100...
|
823 |
+
[2023-02-22 20:28:11,403][06183] Num frames 1200...
|
824 |
+
[2023-02-22 20:28:11,589][06183] Num frames 1300...
|
825 |
+
[2023-02-22 20:28:11,801][06183] Num frames 1400...
|
826 |
+
[2023-02-22 20:28:11,880][06183] Avg episode rewards: #0: 3.520, true rewards: #0: 3.520
|
827 |
+
[2023-02-22 20:28:11,883][06183] Avg episode reward: 3.520, avg true_objective: 3.520
|
828 |
+
[2023-02-22 20:28:12,065][06183] Num frames 1500...
|
829 |
+
[2023-02-22 20:28:12,242][06183] Num frames 1600...
|
830 |
+
[2023-02-22 20:28:12,420][06183] Num frames 1700...
|
831 |
+
[2023-02-22 20:28:12,635][06183] Avg episode rewards: #0: 3.584, true rewards: #0: 3.584
|
832 |
+
[2023-02-22 20:28:12,639][06183] Avg episode reward: 3.584, avg true_objective: 3.584
|
833 |
+
[2023-02-22 20:28:12,659][06183] Num frames 1800...
|
834 |
+
[2023-02-22 20:28:12,838][06183] Num frames 1900...
|
835 |
+
[2023-02-22 20:28:13,023][06183] Num frames 2000...
|
836 |
+
[2023-02-22 20:28:13,210][06183] Num frames 2100...
|
837 |
+
[2023-02-22 20:28:13,401][06183] Num frames 2200...
|
838 |
+
[2023-02-22 20:28:13,475][06183] Avg episode rewards: #0: 3.847, true rewards: #0: 3.680
|
839 |
+
[2023-02-22 20:28:13,477][06183] Avg episode reward: 3.847, avg true_objective: 3.680
|
840 |
+
[2023-02-22 20:28:13,644][06183] Num frames 2300...
|
841 |
+
[2023-02-22 20:28:13,814][06183] Num frames 2400...
|
842 |
+
[2023-02-22 20:28:13,990][06183] Num frames 2500...
|
843 |
+
[2023-02-22 20:28:14,187][06183] Num frames 2600...
|
844 |
+
[2023-02-22 20:28:14,364][06183] Num frames 2700...
|
845 |
+
[2023-02-22 20:28:14,572][06183] Avg episode rewards: #0: 4.549, true rewards: #0: 3.977
|
846 |
+
[2023-02-22 20:28:14,575][06183] Avg episode reward: 4.549, avg true_objective: 3.977
|
847 |
+
[2023-02-22 20:28:14,610][06183] Num frames 2800...
|
848 |
+
[2023-02-22 20:28:14,800][06183] Num frames 2900...
|
849 |
+
[2023-02-22 20:28:14,990][06183] Num frames 3000...
|
850 |
+
[2023-02-22 20:28:15,181][06183] Num frames 3100...
|
851 |
+
[2023-02-22 20:28:15,363][06183] Num frames 3200...
|
852 |
+
[2023-02-22 20:28:15,486][06183] Avg episode rewards: #0: 4.665, true rewards: #0: 4.040
|
853 |
+
[2023-02-22 20:28:15,488][06183] Avg episode reward: 4.665, avg true_objective: 4.040
|
854 |
+
[2023-02-22 20:28:15,635][06183] Num frames 3300...
|
855 |
+
[2023-02-22 20:28:15,828][06183] Num frames 3400...
|
856 |
+
[2023-02-22 20:28:16,009][06183] Num frames 3500...
|
857 |
+
[2023-02-22 20:28:16,196][06183] Num frames 3600...
|
858 |
+
[2023-02-22 20:28:16,399][06183] Avg episode rewards: #0: 4.756, true rewards: #0: 4.089
|
859 |
+
[2023-02-22 20:28:16,401][06183] Avg episode reward: 4.756, avg true_objective: 4.089
|
860 |
+
[2023-02-22 20:28:16,444][06183] Num frames 3700...
|
861 |
+
[2023-02-22 20:28:16,645][06183] Num frames 3800...
|
862 |
+
[2023-02-22 20:28:16,836][06183] Num frames 3900...
|
863 |
+
[2023-02-22 20:28:16,960][06183] Avg episode rewards: #0: 4.536, true rewards: #0: 3.936
|
864 |
+
[2023-02-22 20:28:16,964][06183] Avg episode reward: 4.536, avg true_objective: 3.936
|
865 |
+
[2023-02-22 20:28:18,569][06183] Replay video saved to /mnt/c/Users/chqma/projects/ai/deep-rl-class/notebooks/unit8/train_dir/default_experiment/replay.mp4!
|