andrewzhang505
commited on
Commit
•
4cffbfd
1
Parent(s):
303088e
Upload . with huggingface_hub
Browse files- .summary/0/events.out.tfevents.1673417676.andrew-gpu +3 -0
- README.md +42 -0
- checkpoint_p0/best_000000760_389120_reward_42.463.pth +3 -0
- checkpoint_p0/checkpoint_000000704_360448.pth +3 -0
- checkpoint_p0/checkpoint_000000792_405504.pth +3 -0
- config.json +129 -0
- git.diff +186 -0
- sf_log.txt +249 -0
.summary/0/events.out.tfevents.1673417676.andrew-gpu
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5f5bcafd032d21d96cc07da746dd765a91f8560d22fcfe38b7f88fe133805a25
|
3 |
+
size 61881
|
README.md
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: sample-factory
|
3 |
+
tags:
|
4 |
+
- deep-reinforcement-learning
|
5 |
+
- reinforcement-learning
|
6 |
+
- sample-factory
|
7 |
+
---
|
8 |
+
|
9 |
+
A(n) **APPO** model trained on the **mujoco_ant** environment.
|
10 |
+
|
11 |
+
This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
|
12 |
+
Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
|
13 |
+
|
14 |
+
|
15 |
+
## Downloading the model
|
16 |
+
|
17 |
+
After installing Sample-Factory, download the model with:
|
18 |
+
```
|
19 |
+
python -m sample_factory.huggingface.load_from_hub -r andrewzhang505/ant_test4
|
20 |
+
```
|
21 |
+
|
22 |
+
|
23 |
+
## Using the model
|
24 |
+
|
25 |
+
To run the model after download, use the `enjoy` script corresponding to this environment:
|
26 |
+
```
|
27 |
+
python -m <path.to.enjoy.module> --algo=APPO --env=mujoco_ant --train_dir=./train_dir --experiment=ant_test4
|
28 |
+
```
|
29 |
+
|
30 |
+
|
31 |
+
You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
|
32 |
+
See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
33 |
+
|
34 |
+
## Training with this model
|
35 |
+
|
36 |
+
To continue training with this model, use the `train` script corresponding to this environment:
|
37 |
+
```
|
38 |
+
python -m <path.to.train.module> --algo=APPO --env=mujoco_ant --train_dir=./train_dir --experiment=ant_test4 --restart_behavior=resume --train_for_env_steps=10000000000
|
39 |
+
```
|
40 |
+
|
41 |
+
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
42 |
+
|
checkpoint_p0/best_000000760_389120_reward_42.463.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a81f537790ed40ff09681497ef77143cc8d01ccc205c906aba34cb26b5de07d
|
3 |
+
size 89730
|
checkpoint_p0/checkpoint_000000704_360448.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73dc6a475005ac33458fe996f3e419e919e6bd0796db60dc206c5fe6796b842c
|
3 |
+
size 89730
|
checkpoint_p0/checkpoint_000000792_405504.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d04c7d06b796cecc234541c6283270915c8bae02161032240b1ca8da7b346b8
|
3 |
+
size 89730
|
config.json
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"help": false,
|
3 |
+
"algo": "APPO",
|
4 |
+
"env": "mujoco_ant",
|
5 |
+
"experiment": "ant_test",
|
6 |
+
"train_dir": "/home/andrew_huggingface_co/sample-factory/train_dir",
|
7 |
+
"restart_behavior": "resume",
|
8 |
+
"device": "gpu",
|
9 |
+
"seed": null,
|
10 |
+
"num_policies": 1,
|
11 |
+
"async_rl": false,
|
12 |
+
"serial_mode": false,
|
13 |
+
"batched_sampling": false,
|
14 |
+
"num_batches_to_accumulate": 2,
|
15 |
+
"worker_num_splits": 2,
|
16 |
+
"policy_workers_per_policy": 1,
|
17 |
+
"max_policy_lag": 1000,
|
18 |
+
"num_workers": 8,
|
19 |
+
"num_envs_per_worker": 8,
|
20 |
+
"batch_size": 1024,
|
21 |
+
"num_batches_per_epoch": 4,
|
22 |
+
"num_epochs": 2,
|
23 |
+
"rollout": 64,
|
24 |
+
"recurrence": 1,
|
25 |
+
"shuffle_minibatches": false,
|
26 |
+
"gamma": 0.99,
|
27 |
+
"reward_scale": 1,
|
28 |
+
"reward_clip": 1000.0,
|
29 |
+
"value_bootstrap": true,
|
30 |
+
"normalize_returns": true,
|
31 |
+
"exploration_loss_coeff": 0.0,
|
32 |
+
"value_loss_coeff": 1.3,
|
33 |
+
"kl_loss_coeff": 0.1,
|
34 |
+
"exploration_loss": "entropy",
|
35 |
+
"gae_lambda": 0.95,
|
36 |
+
"ppo_clip_ratio": 0.2,
|
37 |
+
"ppo_clip_value": 1.0,
|
38 |
+
"with_vtrace": false,
|
39 |
+
"vtrace_rho": 1.0,
|
40 |
+
"vtrace_c": 1.0,
|
41 |
+
"optimizer": "adam",
|
42 |
+
"adam_eps": 1e-06,
|
43 |
+
"adam_beta1": 0.9,
|
44 |
+
"adam_beta2": 0.999,
|
45 |
+
"max_grad_norm": 3.5,
|
46 |
+
"learning_rate": 0.00295,
|
47 |
+
"lr_schedule": "linear_decay",
|
48 |
+
"lr_schedule_kl_threshold": 0.008,
|
49 |
+
"obs_subtract_mean": 0.0,
|
50 |
+
"obs_scale": 1.0,
|
51 |
+
"normalize_input": true,
|
52 |
+
"normalize_input_keys": null,
|
53 |
+
"decorrelate_experience_max_seconds": 0,
|
54 |
+
"decorrelate_envs_on_one_worker": true,
|
55 |
+
"actor_worker_gpus": [],
|
56 |
+
"set_workers_cpu_affinity": true,
|
57 |
+
"force_envs_single_thread": false,
|
58 |
+
"default_niceness": 0,
|
59 |
+
"log_to_file": true,
|
60 |
+
"experiment_summaries_interval": 3,
|
61 |
+
"flush_summaries_interval": 30,
|
62 |
+
"stats_avg": 100,
|
63 |
+
"summaries_use_frameskip": true,
|
64 |
+
"heartbeat_interval": 20,
|
65 |
+
"heartbeat_reporting_interval": 180,
|
66 |
+
"train_for_env_steps": 10000000,
|
67 |
+
"train_for_seconds": 10000000000,
|
68 |
+
"save_every_sec": 15,
|
69 |
+
"keep_checkpoints": 2,
|
70 |
+
"load_checkpoint_kind": "latest",
|
71 |
+
"save_milestones_sec": -1,
|
72 |
+
"save_best_every_sec": 5,
|
73 |
+
"save_best_metric": "reward",
|
74 |
+
"save_best_after": 100000,
|
75 |
+
"benchmark": false,
|
76 |
+
"encoder_mlp_layers": [
|
77 |
+
64,
|
78 |
+
64
|
79 |
+
],
|
80 |
+
"encoder_conv_architecture": "convnet_simple",
|
81 |
+
"encoder_conv_mlp_layers": [
|
82 |
+
512
|
83 |
+
],
|
84 |
+
"use_rnn": false,
|
85 |
+
"rnn_size": 512,
|
86 |
+
"rnn_type": "gru",
|
87 |
+
"rnn_num_layers": 1,
|
88 |
+
"decoder_mlp_layers": [],
|
89 |
+
"nonlinearity": "tanh",
|
90 |
+
"policy_initialization": "torch_default",
|
91 |
+
"policy_init_gain": 1.0,
|
92 |
+
"actor_critic_share_weights": true,
|
93 |
+
"adaptive_stddev": false,
|
94 |
+
"continuous_tanh_scale": 0.0,
|
95 |
+
"initial_stddev": 1.0,
|
96 |
+
"use_env_info_cache": false,
|
97 |
+
"env_gpu_actions": false,
|
98 |
+
"env_gpu_observations": true,
|
99 |
+
"env_frameskip": 1,
|
100 |
+
"env_framestack": 1,
|
101 |
+
"pixel_format": "CHW",
|
102 |
+
"use_record_episode_statistics": false,
|
103 |
+
"with_wandb": false,
|
104 |
+
"wandb_user": null,
|
105 |
+
"wandb_project": "sample_factory",
|
106 |
+
"wandb_group": null,
|
107 |
+
"wandb_job_type": "SF",
|
108 |
+
"wandb_tags": [],
|
109 |
+
"with_pbt": false,
|
110 |
+
"pbt_mix_policies_in_one_env": true,
|
111 |
+
"pbt_period_env_steps": 5000000,
|
112 |
+
"pbt_start_mutation": 20000000,
|
113 |
+
"pbt_replace_fraction": 0.3,
|
114 |
+
"pbt_mutation_rate": 0.15,
|
115 |
+
"pbt_replace_reward_gap": 0.1,
|
116 |
+
"pbt_replace_reward_gap_absolute": 1e-06,
|
117 |
+
"pbt_optimize_gamma": false,
|
118 |
+
"pbt_target_objective": "true_objective",
|
119 |
+
"pbt_perturb_min": 1.1,
|
120 |
+
"pbt_perturb_max": 1.5,
|
121 |
+
"command_line": "--algo=APPO --env=mujoco_ant --experiment=ant_test",
|
122 |
+
"cli_args": {
|
123 |
+
"algo": "APPO",
|
124 |
+
"env": "mujoco_ant",
|
125 |
+
"experiment": "ant_test"
|
126 |
+
},
|
127 |
+
"git_hash": "7355a9d939997e11f02d311f71e4ca3be2e9258f",
|
128 |
+
"git_repo_name": "https://github.com/andrewzhang505/sample-factory.git"
|
129 |
+
}
|
git.diff
ADDED
@@ -0,0 +1,186 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
diff --git a/docs/10-huggingface/huggingface.md b/docs/10-huggingface/huggingface.md
|
2 |
+
index 8846da73..1f1fae6f 100644
|
3 |
+
--- a/docs/10-huggingface/huggingface.md
|
4 |
+
+++ b/docs/10-huggingface/huggingface.md
|
5 |
+
@@ -77,10 +77,16 @@ You can also save a video of the model during evaluation to upload to the hub wi
|
6 |
+
|
7 |
+
- `--video_name`: The name of the video to save as. If `None`, will save to `replay.mp4` in your experiment directory
|
8 |
+
|
9 |
+
+Also, you can include information in the Hugging Face Hub model card for how to train and enjoy using this model. These parameters are optional:
|
10 |
+
+
|
11 |
+
+- `--train_script`: The module path for training this model
|
12 |
+
+
|
13 |
+
+- `--enjoy_script`: The module path for enjoying this model
|
14 |
+
+
|
15 |
+
For example:
|
16 |
+
|
17 |
+
```
|
18 |
+
-python -m sf_examples.mujoco.enjoy_mujoco --algo=APPO --env=mujoco_ant --experiment=<repo_name> --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=<username>/<hf_repo_name> --save_video --no_render
|
19 |
+
+python -m sf_examples.mujoco.enjoy_mujoco --algo=APPO --env=mujoco_ant --experiment=<repo_name> --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=<username>/<hf_repo_name> --save_video --no_render --enjoy_script=sf_examples.mujoco.enjoy_mujoco --train_script=sf_examples.mujoco.train_mujoco
|
20 |
+
```
|
21 |
+
|
22 |
+
#### Using the push_to_hub Script
|
23 |
+
@@ -95,4 +101,6 @@ The command line arguments are:
|
24 |
+
|
25 |
+
- `-r`: The repo_id to save on HF Hub. This is the same as `hf_repository` in the enjoy script and must be in the form `<hf_username>/<hf_repo_name>`
|
26 |
+
|
27 |
+
-- `-d`: The full path to your experiment directory to upload
|
28 |
+
|
29 |
+
+- `-d`: The full path to your experiment directory to upload
|
30 |
+
+
|
31 |
+
+The optional arguments of `--train_script` and `--enjoy_script` can also be used. See the above section for more details
|
32 |
+
|
33 |
+
diff --git a/sample_factory/cfg/arguments.py b/sample_factory/cfg/arguments.py
|
34 |
+
index 820efce6..f736342d 100644
|
35 |
+
--- a/sample_factory/cfg/arguments.py
|
36 |
+
+++ b/sample_factory/cfg/arguments.py
|
37 |
+
@@ -18,7 +18,7 @@ from sample_factory.cfg.cfg import (
|
38 |
+
)
|
39 |
+
from sample_factory.utils.attr_dict import AttrDict
|
40 |
+
from sample_factory.utils.typing import Config
|
41 |
+
-from sample_factory.utils.utils import cfg_file, cfg_file_old, get_git_commit_hash, get_top_level_script, log
|
42 |
+
+from sample_factory.utils.utils import cfg_file, cfg_file_old, get_git_commit_hash, log
|
43 |
+
|
44 |
+
|
45 |
+
def parse_sf_args(
|
46 |
+
@@ -91,7 +91,6 @@ def postprocess_args(args, argv, parser) -> argparse.Namespace:
|
47 |
+
|
48 |
+
args.cli_args = vars(cli_args)
|
49 |
+
args.git_hash, args.git_repo_name = get_git_commit_hash()
|
50 |
+
- args.train_script = get_top_level_script()
|
51 |
+
return args
|
52 |
+
|
53 |
+
|
54 |
+
diff --git a/sample_factory/cfg/cfg.py b/sample_factory/cfg/cfg.py
|
55 |
+
index 43393da1..360e6895 100644
|
56 |
+
--- a/sample_factory/cfg/cfg.py
|
57 |
+
+++ b/sample_factory/cfg/cfg.py
|
58 |
+
@@ -675,6 +675,19 @@ def add_eval_args(parser):
|
59 |
+
help="False to sample from action distributions at test time. True to just use the argmax.",
|
60 |
+
)
|
61 |
+
|
62 |
+
+ parser.add_argument(
|
63 |
+
+ "--train_script",
|
64 |
+
+ default=None,
|
65 |
+
+ type=str,
|
66 |
+
+ help="Module name used to run training script. Used to generate HF model card",
|
67 |
+
+ )
|
68 |
+
+ parser.add_argument(
|
69 |
+
+ "--enjoy_script",
|
70 |
+
+ default=None,
|
71 |
+
+ type=str,
|
72 |
+
+ help="Module name used to run training script. Used to generate HF model card",
|
73 |
+
+ )
|
74 |
+
+
|
75 |
+
|
76 |
+
def add_wandb_args(p: ArgumentParser):
|
77 |
+
"""Weights and Biases experiment monitoring."""
|
78 |
+
diff --git a/sample_factory/enjoy.py b/sample_factory/enjoy.py
|
79 |
+
index 341b537b..b620c532 100644
|
80 |
+
--- a/sample_factory/enjoy.py
|
81 |
+
+++ b/sample_factory/enjoy.py
|
82 |
+
@@ -21,7 +21,7 @@ from sample_factory.model.actor_critic import create_actor_critic
|
83 |
+
from sample_factory.model.model_utils import get_rnn_size
|
84 |
+
from sample_factory.utils.attr_dict import AttrDict
|
85 |
+
from sample_factory.utils.typing import Config, StatusCode
|
86 |
+
-from sample_factory.utils.utils import debug_log_every_n, experiment_dir, get_top_level_script, log
|
87 |
+
+from sample_factory.utils.utils import debug_log_every_n, experiment_dir, log
|
88 |
+
|
89 |
+
|
90 |
+
def visualize_policy_inputs(normalized_obs: Dict[str, Tensor]) -> None:
|
91 |
+
@@ -260,9 +260,8 @@ def enjoy(cfg: Config) -> Tuple[StatusCode, float]:
|
92 |
+
generate_replay_video(experiment_dir(cfg=cfg), video_frames, fps)
|
93 |
+
|
94 |
+
if cfg.push_to_hub:
|
95 |
+
- enjoy_name = get_top_level_script()
|
96 |
+
generate_model_card(
|
97 |
+
- experiment_dir(cfg=cfg), cfg.algo, cfg.env, cfg.hf_repository, reward_list, enjoy_name, cfg.train_script
|
98 |
+
+ experiment_dir(cfg=cfg), cfg.algo, cfg.env, cfg.hf_repository, reward_list, cfg.enjoy_script, cfg.train_script
|
99 |
+
)
|
100 |
+
push_to_hf(experiment_dir(cfg=cfg), cfg.hf_repository)
|
101 |
+
|
102 |
+
diff --git a/sample_factory/huggingface/huggingface_utils.py b/sample_factory/huggingface/huggingface_utils.py
|
103 |
+
index 90184da7..5b4a6b14 100644
|
104 |
+
--- a/sample_factory/huggingface/huggingface_utils.py
|
105 |
+
+++ b/sample_factory/huggingface/huggingface_utils.py
|
106 |
+
@@ -57,8 +57,10 @@ python -m sample_factory.huggingface.load_from_hub -r {repo_id}
|
107 |
+
```\n
|
108 |
+
"""
|
109 |
+
|
110 |
+
- if enjoy_name is not None:
|
111 |
+
- readme += f"""
|
112 |
+
+ if enjoy_name is None:
|
113 |
+
+ enjoy_name = "<path.to.enjoy.module>"
|
114 |
+
+
|
115 |
+
+ readme += f"""
|
116 |
+
## Using the model\n
|
117 |
+
To run the model after download, use the `enjoy` script corresponding to this environment:
|
118 |
+
```
|
119 |
+
@@ -67,17 +69,19 @@ python -m {enjoy_name} --algo={algo} --env={env} --train_dir=./train_dir --exper
|
120 |
+
\n
|
121 |
+
You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
|
122 |
+
See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
|
123 |
+
- """
|
124 |
+
+ """
|
125 |
+
|
126 |
+
- if train_name is not None:
|
127 |
+
- readme += f"""
|
128 |
+
+ if train_name is None:
|
129 |
+
+ train_name = "<path.to.train.module>"
|
130 |
+
+
|
131 |
+
+ readme += f"""
|
132 |
+
## Training with this model\n
|
133 |
+
To continue training with this model, use the `train` script corresponding to this environment:
|
134 |
+
```
|
135 |
+
python -m {train_name} --algo={algo} --env={env} --train_dir=./train_dir --experiment={repo_name} --restart_behavior=resume --train_for_env_steps=10000000000
|
136 |
+
```\n
|
137 |
+
Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
|
138 |
+
- """
|
139 |
+
+ """
|
140 |
+
|
141 |
+
with open(readme_path, "w", encoding="utf-8") as f:
|
142 |
+
f.write(readme)
|
143 |
+
diff --git a/sample_factory/huggingface/push_to_hub.py b/sample_factory/huggingface/push_to_hub.py
|
144 |
+
index dbd5c382..d67806ad 100644
|
145 |
+
--- a/sample_factory/huggingface/push_to_hub.py
|
146 |
+
+++ b/sample_factory/huggingface/push_to_hub.py
|
147 |
+
@@ -16,6 +16,18 @@ def main():
|
148 |
+
type=str,
|
149 |
+
)
|
150 |
+
parser.add_argument("-d", "--experiment_dir", help="Path to your experiment directory", type=str)
|
151 |
+
+ parser.add_argument(
|
152 |
+
+ "--train_script",
|
153 |
+
+ default=None,
|
154 |
+
+ type=str,
|
155 |
+
+ help="Module name used to run training script. Used to generate HF model card",
|
156 |
+
+ )
|
157 |
+
+ parser.add_argument(
|
158 |
+
+ "--enjoy_script",
|
159 |
+
+ default=None,
|
160 |
+
+ type=str,
|
161 |
+
+ help="Module name used to run training script. Used to generate HF model card",
|
162 |
+
+ )
|
163 |
+
args = parser.parse_args()
|
164 |
+
|
165 |
+
cfg_file = os.path.join(args.experiment_dir, "config.json")
|
166 |
+
@@ -34,7 +46,7 @@ def main():
|
167 |
+
json_params = json.load(json_file)
|
168 |
+
cfg = AttrDict(json_params)
|
169 |
+
|
170 |
+
- generate_model_card(args.experiment_dir, cfg.algo, cfg.env, args.hf_repository)
|
171 |
+
+ generate_model_card(args.experiment_dir, cfg.algo, cfg.env, args.hf_repository, enjoy_name=args.enjoy_script, train_name=args.train_script)
|
172 |
+
push_to_hf(args.experiment_dir, args.hf_repository)
|
173 |
+
|
174 |
+
|
175 |
+
diff --git a/sample_factory/utils/utils.py b/sample_factory/utils/utils.py
|
176 |
+
index 99db3c10..fcd335c5 100644
|
177 |
+
--- a/sample_factory/utils/utils.py
|
178 |
+
+++ b/sample_factory/utils/utils.py
|
179 |
+
@@ -493,5 +493,5 @@ def debug_log_every_n(n, msg, *args, **kwargs):
|
180 |
+
log_every_n(n, logging.DEBUG, msg, *args, **kwargs)
|
181 |
+
|
182 |
+
|
183 |
+
-def get_top_level_script():
|
184 |
+
- return argv[0].split("sample-factory/")[-1][:-3].replace("/", ".")
|
185 |
+
+# def get_top_level_script():
|
186 |
+
+# return argv[0].split("sample-factory/")[-1][:-3].replace("/", ".")
|
sf_log.txt
ADDED
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[2023-01-11 06:14:39,279][18976] Saving configuration to /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/config.json...
|
2 |
+
[2023-01-11 06:14:39,292][18976] Rollout worker 0 uses device cpu
|
3 |
+
[2023-01-11 06:14:39,292][18976] Rollout worker 1 uses device cpu
|
4 |
+
[2023-01-11 06:14:39,293][18976] Rollout worker 2 uses device cpu
|
5 |
+
[2023-01-11 06:14:39,293][18976] Rollout worker 3 uses device cpu
|
6 |
+
[2023-01-11 06:14:39,293][18976] Rollout worker 4 uses device cpu
|
7 |
+
[2023-01-11 06:14:39,293][18976] Rollout worker 5 uses device cpu
|
8 |
+
[2023-01-11 06:14:39,293][18976] Rollout worker 6 uses device cpu
|
9 |
+
[2023-01-11 06:14:39,293][18976] Rollout worker 7 uses device cpu
|
10 |
+
[2023-01-11 06:14:39,293][18976] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
|
11 |
+
[2023-01-11 06:14:39,316][18976] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
12 |
+
[2023-01-11 06:14:39,316][18976] InferenceWorker_p0-w0: min num requests: 2
|
13 |
+
[2023-01-11 06:14:39,350][18976] Starting all processes...
|
14 |
+
[2023-01-11 06:14:39,350][18976] Starting process learner_proc0
|
15 |
+
[2023-01-11 06:14:39,356][18976] Starting all processes...
|
16 |
+
[2023-01-11 06:14:39,362][18976] Starting process inference_proc0-0
|
17 |
+
[2023-01-11 06:14:39,362][18976] Starting process rollout_proc0
|
18 |
+
[2023-01-11 06:14:39,363][18976] Starting process rollout_proc1
|
19 |
+
[2023-01-11 06:14:39,363][18976] Starting process rollout_proc2
|
20 |
+
[2023-01-11 06:14:39,363][18976] Starting process rollout_proc3
|
21 |
+
[2023-01-11 06:14:39,363][18976] Starting process rollout_proc4
|
22 |
+
[2023-01-11 06:14:39,364][18976] Starting process rollout_proc5
|
23 |
+
[2023-01-11 06:14:39,364][18976] Starting process rollout_proc6
|
24 |
+
[2023-01-11 06:14:39,364][18976] Starting process rollout_proc7
|
25 |
+
[2023-01-11 06:14:41,133][19078] Worker 3 uses CPU cores [3]
|
26 |
+
[2023-01-11 06:14:41,149][19080] Worker 4 uses CPU cores [4]
|
27 |
+
[2023-01-11 06:14:41,173][19081] Worker 6 uses CPU cores [6]
|
28 |
+
[2023-01-11 06:14:41,236][19062] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
29 |
+
[2023-01-11 06:14:41,236][19062] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
|
30 |
+
[2023-01-11 06:14:41,237][19079] Worker 2 uses CPU cores [2]
|
31 |
+
[2023-01-11 06:14:41,419][19077] Worker 0 uses CPU cores [0]
|
32 |
+
[2023-01-11 06:14:41,431][19076] Worker 1 uses CPU cores [1]
|
33 |
+
[2023-01-11 06:14:41,458][19075] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
34 |
+
[2023-01-11 06:14:41,458][19075] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
|
35 |
+
[2023-01-11 06:14:41,495][19082] Worker 5 uses CPU cores [5]
|
36 |
+
[2023-01-11 06:14:41,525][19083] Worker 7 uses CPU cores [7]
|
37 |
+
[2023-01-11 06:14:41,971][19062] Num visible devices: 1
|
38 |
+
[2023-01-11 06:14:41,971][19075] Num visible devices: 1
|
39 |
+
[2023-01-11 06:14:42,022][19062] Starting seed is not provided
|
40 |
+
[2023-01-11 06:14:42,022][19062] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
41 |
+
[2023-01-11 06:14:42,022][19062] Initializing actor-critic model on device cuda:0
|
42 |
+
[2023-01-11 06:14:42,023][19062] RunningMeanStd input shape: (27,)
|
43 |
+
[2023-01-11 06:14:42,023][19062] RunningMeanStd input shape: (1,)
|
44 |
+
[2023-01-11 06:14:42,127][19062] Created Actor Critic model with architecture:
|
45 |
+
[2023-01-11 06:14:42,128][19062] ActorCriticSharedWeights(
|
46 |
+
(obs_normalizer): ObservationNormalizer(
|
47 |
+
(running_mean_std): RunningMeanStdDictInPlace(
|
48 |
+
(running_mean_std): ModuleDict(
|
49 |
+
(obs): RunningMeanStdInPlace()
|
50 |
+
)
|
51 |
+
)
|
52 |
+
)
|
53 |
+
(returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
|
54 |
+
(encoder): MultiInputEncoder(
|
55 |
+
(encoders): ModuleDict(
|
56 |
+
(obs): MlpEncoder(
|
57 |
+
(mlp_head): RecursiveScriptModule(
|
58 |
+
original_name=Sequential
|
59 |
+
(0): RecursiveScriptModule(original_name=Linear)
|
60 |
+
(1): RecursiveScriptModule(original_name=Tanh)
|
61 |
+
(2): RecursiveScriptModule(original_name=Linear)
|
62 |
+
(3): RecursiveScriptModule(original_name=Tanh)
|
63 |
+
)
|
64 |
+
)
|
65 |
+
)
|
66 |
+
)
|
67 |
+
(core): ModelCoreIdentity()
|
68 |
+
(decoder): MlpDecoder(
|
69 |
+
(mlp): Identity()
|
70 |
+
)
|
71 |
+
(critic_linear): Linear(in_features=64, out_features=1, bias=True)
|
72 |
+
(action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
|
73 |
+
(distribution_linear): Linear(in_features=64, out_features=8, bias=True)
|
74 |
+
)
|
75 |
+
)
|
76 |
+
[2023-01-11 06:14:47,316][19062] Using optimizer <class 'torch.optim.adam.Adam'>
|
77 |
+
[2023-01-11 06:14:47,317][19062] No checkpoints found
|
78 |
+
[2023-01-11 06:14:47,317][19062] Did not load from checkpoint, starting from scratch!
|
79 |
+
[2023-01-11 06:14:47,318][19062] Initialized policy 0 weights for model version 0
|
80 |
+
[2023-01-11 06:14:47,321][19062] LearnerWorker_p0 finished initialization!
|
81 |
+
[2023-01-11 06:14:47,322][19062] Using GPUs [0] for process 0 (actually maps to GPUs [0])
|
82 |
+
[2023-01-11 06:14:47,429][19075] RunningMeanStd input shape: (27,)
|
83 |
+
[2023-01-11 06:14:47,429][19075] RunningMeanStd input shape: (1,)
|
84 |
+
[2023-01-11 06:14:50,677][18976] Inference worker 0-0 is ready!
|
85 |
+
[2023-01-11 06:14:50,678][18976] All inference workers are ready! Signal rollout workers to start!
|
86 |
+
[2023-01-11 06:14:50,880][19081] Decorrelating experience for 0 frames...
|
87 |
+
[2023-01-11 06:14:50,881][19076] Decorrelating experience for 0 frames...
|
88 |
+
[2023-01-11 06:14:50,882][19081] Decorrelating experience for 64 frames...
|
89 |
+
[2023-01-11 06:14:50,883][19076] Decorrelating experience for 64 frames...
|
90 |
+
[2023-01-11 06:14:50,883][19077] Decorrelating experience for 0 frames...
|
91 |
+
[2023-01-11 06:14:50,884][19080] Decorrelating experience for 0 frames...
|
92 |
+
[2023-01-11 06:14:50,884][19079] Decorrelating experience for 0 frames...
|
93 |
+
[2023-01-11 06:14:50,885][19082] Decorrelating experience for 0 frames...
|
94 |
+
[2023-01-11 06:14:50,885][19078] Decorrelating experience for 0 frames...
|
95 |
+
[2023-01-11 06:14:50,885][19083] Decorrelating experience for 0 frames...
|
96 |
+
[2023-01-11 06:14:50,885][19077] Decorrelating experience for 64 frames...
|
97 |
+
[2023-01-11 06:14:50,886][19080] Decorrelating experience for 64 frames...
|
98 |
+
[2023-01-11 06:14:50,886][19079] Decorrelating experience for 64 frames...
|
99 |
+
[2023-01-11 06:14:50,887][19082] Decorrelating experience for 64 frames...
|
100 |
+
[2023-01-11 06:14:50,887][19078] Decorrelating experience for 64 frames...
|
101 |
+
[2023-01-11 06:14:50,887][19083] Decorrelating experience for 64 frames...
|
102 |
+
[2023-01-11 06:14:50,938][19076] Decorrelating experience for 128 frames...
|
103 |
+
[2023-01-11 06:14:50,939][19081] Decorrelating experience for 128 frames...
|
104 |
+
[2023-01-11 06:14:50,940][19077] Decorrelating experience for 128 frames...
|
105 |
+
[2023-01-11 06:14:50,941][19080] Decorrelating experience for 128 frames...
|
106 |
+
[2023-01-11 06:14:50,943][19083] Decorrelating experience for 128 frames...
|
107 |
+
[2023-01-11 06:14:50,943][19078] Decorrelating experience for 128 frames...
|
108 |
+
[2023-01-11 06:14:50,943][19079] Decorrelating experience for 128 frames...
|
109 |
+
[2023-01-11 06:14:50,944][19082] Decorrelating experience for 128 frames...
|
110 |
+
[2023-01-11 06:14:51,044][19076] Decorrelating experience for 192 frames...
|
111 |
+
[2023-01-11 06:14:51,047][19081] Decorrelating experience for 192 frames...
|
112 |
+
[2023-01-11 06:14:51,047][19077] Decorrelating experience for 192 frames...
|
113 |
+
[2023-01-11 06:14:51,050][19083] Decorrelating experience for 192 frames...
|
114 |
+
[2023-01-11 06:14:51,050][19080] Decorrelating experience for 192 frames...
|
115 |
+
[2023-01-11 06:14:51,051][19078] Decorrelating experience for 192 frames...
|
116 |
+
[2023-01-11 06:14:51,051][19082] Decorrelating experience for 192 frames...
|
117 |
+
[2023-01-11 06:14:51,054][19079] Decorrelating experience for 192 frames...
|
118 |
+
[2023-01-11 06:14:51,235][19076] Decorrelating experience for 256 frames...
|
119 |
+
[2023-01-11 06:14:51,238][19083] Decorrelating experience for 256 frames...
|
120 |
+
[2023-01-11 06:14:51,239][19077] Decorrelating experience for 256 frames...
|
121 |
+
[2023-01-11 06:14:51,242][19078] Decorrelating experience for 256 frames...
|
122 |
+
[2023-01-11 06:14:51,243][19081] Decorrelating experience for 256 frames...
|
123 |
+
[2023-01-11 06:14:51,247][19080] Decorrelating experience for 256 frames...
|
124 |
+
[2023-01-11 06:14:51,248][19082] Decorrelating experience for 256 frames...
|
125 |
+
[2023-01-11 06:14:51,251][19079] Decorrelating experience for 256 frames...
|
126 |
+
[2023-01-11 06:14:51,333][18976] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
|
127 |
+
[2023-01-11 06:14:51,335][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth...
|
128 |
+
[2023-01-11 06:14:51,445][19083] Decorrelating experience for 320 frames...
|
129 |
+
[2023-01-11 06:14:51,447][19077] Decorrelating experience for 320 frames...
|
130 |
+
[2023-01-11 06:14:51,450][19076] Decorrelating experience for 320 frames...
|
131 |
+
[2023-01-11 06:14:51,452][19078] Decorrelating experience for 320 frames...
|
132 |
+
[2023-01-11 06:14:51,453][19081] Decorrelating experience for 320 frames...
|
133 |
+
[2023-01-11 06:14:51,457][19082] Decorrelating experience for 320 frames...
|
134 |
+
[2023-01-11 06:14:51,458][19080] Decorrelating experience for 320 frames...
|
135 |
+
[2023-01-11 06:14:51,465][19079] Decorrelating experience for 320 frames...
|
136 |
+
[2023-01-11 06:14:51,706][19083] Decorrelating experience for 384 frames...
|
137 |
+
[2023-01-11 06:14:51,712][19076] Decorrelating experience for 384 frames...
|
138 |
+
[2023-01-11 06:14:51,716][19081] Decorrelating experience for 384 frames...
|
139 |
+
[2023-01-11 06:14:51,717][19078] Decorrelating experience for 384 frames...
|
140 |
+
[2023-01-11 06:14:51,717][19077] Decorrelating experience for 384 frames...
|
141 |
+
[2023-01-11 06:14:51,723][19082] Decorrelating experience for 384 frames...
|
142 |
+
[2023-01-11 06:14:51,724][19080] Decorrelating experience for 384 frames...
|
143 |
+
[2023-01-11 06:14:51,737][19079] Decorrelating experience for 384 frames...
|
144 |
+
[2023-01-11 06:14:52,017][19083] Decorrelating experience for 448 frames...
|
145 |
+
[2023-01-11 06:14:52,029][19076] Decorrelating experience for 448 frames...
|
146 |
+
[2023-01-11 06:14:52,036][19077] Decorrelating experience for 448 frames...
|
147 |
+
[2023-01-11 06:14:52,037][19082] Decorrelating experience for 448 frames...
|
148 |
+
[2023-01-11 06:14:52,038][19081] Decorrelating experience for 448 frames...
|
149 |
+
[2023-01-11 06:14:52,041][19078] Decorrelating experience for 448 frames...
|
150 |
+
[2023-01-11 06:14:52,053][19080] Decorrelating experience for 448 frames...
|
151 |
+
[2023-01-11 06:14:52,064][19079] Decorrelating experience for 448 frames...
|
152 |
+
[2023-01-11 06:14:56,333][18976] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 4096. Throughput: 0: 177.6. Samples: 888. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
153 |
+
[2023-01-11 06:14:56,333][18976] Avg episode reward: [(0, '-126.473')]
|
154 |
+
[2023-01-11 06:14:59,307][18976] Heartbeat connected on Batcher_0
|
155 |
+
[2023-01-11 06:14:59,311][18976] Heartbeat connected on LearnerWorker_p0
|
156 |
+
[2023-01-11 06:14:59,322][18976] Heartbeat connected on RolloutWorker_w0
|
157 |
+
[2023-01-11 06:14:59,326][18976] Heartbeat connected on InferenceWorker_p0-w0
|
158 |
+
[2023-01-11 06:14:59,331][18976] Heartbeat connected on RolloutWorker_w1
|
159 |
+
[2023-01-11 06:14:59,335][18976] Heartbeat connected on RolloutWorker_w2
|
160 |
+
[2023-01-11 06:14:59,340][18976] Heartbeat connected on RolloutWorker_w3
|
161 |
+
[2023-01-11 06:14:59,341][18976] Heartbeat connected on RolloutWorker_w5
|
162 |
+
[2023-01-11 06:14:59,341][18976] Heartbeat connected on RolloutWorker_w4
|
163 |
+
[2023-01-11 06:14:59,351][18976] Heartbeat connected on RolloutWorker_w6
|
164 |
+
[2023-01-11 06:14:59,351][18976] Heartbeat connected on RolloutWorker_w7
|
165 |
+
[2023-01-11 06:15:01,333][18976] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 32768. Throughput: 0: 2408.4. Samples: 24084. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
166 |
+
[2023-01-11 06:15:01,333][18976] Avg episode reward: [(0, '-130.992')]
|
167 |
+
[2023-01-11 06:15:03,917][19075] Updated weights for policy 0, policy_version 80 (0.0008)
|
168 |
+
[2023-01-11 06:15:06,333][18976] Fps is (10 sec: 4096.0, 60 sec: 3003.7, 300 sec: 3003.7). Total num frames: 45056. Throughput: 0: 3008.0. Samples: 45120. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
169 |
+
[2023-01-11 06:15:06,334][18976] Avg episode reward: [(0, '-227.535')]
|
170 |
+
[2023-01-11 06:15:06,339][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000088_45056.pth...
|
171 |
+
[2023-01-11 06:15:11,333][18976] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3686.4). Total num frames: 73728. Throughput: 0: 3020.8. Samples: 60416. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
172 |
+
[2023-01-11 06:15:11,333][18976] Avg episode reward: [(0, '-297.337')]
|
173 |
+
[2023-01-11 06:15:12,628][19075] Updated weights for policy 0, policy_version 160 (0.0007)
|
174 |
+
[2023-01-11 06:15:16,332][18976] Fps is (10 sec: 5324.9, 60 sec: 3932.2, 300 sec: 3932.2). Total num frames: 98304. Throughput: 0: 3737.8. Samples: 93444. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
|
175 |
+
[2023-01-11 06:15:16,333][18976] Avg episode reward: [(0, '-127.822')]
|
176 |
+
[2023-01-11 06:15:20,209][19075] Updated weights for policy 0, policy_version 240 (0.0007)
|
177 |
+
[2023-01-11 06:15:21,333][18976] Fps is (10 sec: 5324.8, 60 sec: 4232.5, 300 sec: 4232.5). Total num frames: 126976. Throughput: 0: 4195.5. Samples: 125864. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
|
178 |
+
[2023-01-11 06:15:21,333][18976] Avg episode reward: [(0, '-105.405')]
|
179 |
+
[2023-01-11 06:15:21,340][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000248_126976.pth...
|
180 |
+
[2023-01-11 06:15:21,347][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth
|
181 |
+
[2023-01-11 06:15:21,348][19062] Saving new best policy, reward=-105.405!
|
182 |
+
[2023-01-11 06:15:26,333][18976] Fps is (10 sec: 5734.3, 60 sec: 4447.1, 300 sec: 4447.1). Total num frames: 155648. Throughput: 0: 4065.3. Samples: 142284. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
183 |
+
[2023-01-11 06:15:26,333][18976] Avg episode reward: [(0, '-109.426')]
|
184 |
+
[2023-01-11 06:15:27,645][19075] Updated weights for policy 0, policy_version 320 (0.0007)
|
185 |
+
[2023-01-11 06:15:31,332][18976] Fps is (10 sec: 5324.9, 60 sec: 4505.6, 300 sec: 4505.6). Total num frames: 180224. Throughput: 0: 4384.3. Samples: 175372. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
186 |
+
[2023-01-11 06:15:31,333][18976] Avg episode reward: [(0, '-72.518')]
|
187 |
+
[2023-01-11 06:15:31,333][19062] Saving new best policy, reward=-72.518!
|
188 |
+
[2023-01-11 06:15:35,206][19075] Updated weights for policy 0, policy_version 400 (0.0007)
|
189 |
+
[2023-01-11 06:15:36,333][18976] Fps is (10 sec: 5324.8, 60 sec: 4642.1, 300 sec: 4642.1). Total num frames: 208896. Throughput: 0: 4618.4. Samples: 207828. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
190 |
+
[2023-01-11 06:15:36,333][18976] Avg episode reward: [(0, '-35.778')]
|
191 |
+
[2023-01-11 06:15:36,339][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000408_208896.pth...
|
192 |
+
[2023-01-11 06:15:36,347][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000088_45056.pth
|
193 |
+
[2023-01-11 06:15:36,347][19062] Saving new best policy, reward=-35.778!
|
194 |
+
[2023-01-11 06:15:41,333][18976] Fps is (10 sec: 5734.3, 60 sec: 4751.4, 300 sec: 4751.4). Total num frames: 237568. Throughput: 0: 4961.7. Samples: 224164. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
195 |
+
[2023-01-11 06:15:41,333][18976] Avg episode reward: [(0, '-30.077')]
|
196 |
+
[2023-01-11 06:15:41,334][19062] Saving new best policy, reward=-30.077!
|
197 |
+
[2023-01-11 06:15:42,735][19075] Updated weights for policy 0, policy_version 480 (0.0007)
|
198 |
+
[2023-01-11 06:15:46,333][18976] Fps is (10 sec: 5324.9, 60 sec: 4766.3, 300 sec: 4766.3). Total num frames: 262144. Throughput: 0: 5175.8. Samples: 256996. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
|
199 |
+
[2023-01-11 06:15:46,333][18976] Avg episode reward: [(0, '-40.853')]
|
200 |
+
[2023-01-11 06:15:50,214][19075] Updated weights for policy 0, policy_version 560 (0.0007)
|
201 |
+
[2023-01-11 06:15:51,333][18976] Fps is (10 sec: 5324.8, 60 sec: 4846.9, 300 sec: 4846.9). Total num frames: 290816. Throughput: 0: 5434.8. Samples: 289688. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
202 |
+
[2023-01-11 06:15:51,333][18976] Avg episode reward: [(0, '-38.381')]
|
203 |
+
[2023-01-11 06:15:51,340][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000568_290816.pth...
|
204 |
+
[2023-01-11 06:15:51,347][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000248_126976.pth
|
205 |
+
[2023-01-11 06:15:56,333][18976] Fps is (10 sec: 5734.3, 60 sec: 5256.5, 300 sec: 4915.2). Total num frames: 319488. Throughput: 0: 5459.1. Samples: 306076. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
206 |
+
[2023-01-11 06:15:56,333][18976] Avg episode reward: [(0, '-20.183')]
|
207 |
+
[2023-01-11 06:15:56,334][19062] Saving new best policy, reward=-20.183!
|
208 |
+
[2023-01-11 06:15:57,590][19075] Updated weights for policy 0, policy_version 640 (0.0006)
|
209 |
+
[2023-01-11 06:16:01,333][18976] Fps is (10 sec: 5324.9, 60 sec: 5188.3, 300 sec: 4915.2). Total num frames: 344064. Throughput: 0: 5477.0. Samples: 339908. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
|
210 |
+
[2023-01-11 06:16:01,333][18976] Avg episode reward: [(0, '7.282')]
|
211 |
+
[2023-01-11 06:16:01,335][19062] Saving new best policy, reward=7.282!
|
212 |
+
[2023-01-11 06:16:06,333][18976] Fps is (10 sec: 4096.0, 60 sec: 5256.5, 300 sec: 4806.0). Total num frames: 360448. Throughput: 0: 5188.2. Samples: 359332. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
|
213 |
+
[2023-01-11 06:16:06,333][18976] Avg episode reward: [(0, '20.695')]
|
214 |
+
[2023-01-11 06:16:06,339][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000704_360448.pth...
|
215 |
+
[2023-01-11 06:16:06,346][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000408_208896.pth
|
216 |
+
[2023-01-11 06:16:06,347][19062] Saving new best policy, reward=20.695!
|
217 |
+
[2023-01-11 06:16:07,412][19075] Updated weights for policy 0, policy_version 720 (0.0007)
|
218 |
+
[2023-01-11 06:16:11,332][18976] Fps is (10 sec: 4505.6, 60 sec: 5256.5, 300 sec: 4864.0). Total num frames: 389120. Throughput: 0: 5189.2. Samples: 375796. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
|
219 |
+
[2023-01-11 06:16:11,333][18976] Avg episode reward: [(0, '42.463')]
|
220 |
+
[2023-01-11 06:16:11,334][19062] Saving new best policy, reward=42.463!
|
221 |
+
[2023-01-11 06:16:14,150][18976] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 18976], exiting...
|
222 |
+
[2023-01-11 06:16:14,150][18976] Runner profile tree view:
|
223 |
+
main_loop: 94.8007
|
224 |
+
[2023-01-11 06:16:14,150][19080] Stopping RolloutWorker_w4...
|
225 |
+
[2023-01-11 06:16:14,151][19079] Stopping RolloutWorker_w2...
|
226 |
+
[2023-01-11 06:16:14,151][18976] Collected {0: 401408}, FPS: 4234.2
|
227 |
+
[2023-01-11 06:16:14,151][19082] Stopping RolloutWorker_w5...
|
228 |
+
[2023-01-11 06:16:14,151][19079] Loop rollout_proc2_evt_loop terminating...
|
229 |
+
[2023-01-11 06:16:14,151][19080] Loop rollout_proc4_evt_loop terminating...
|
230 |
+
[2023-01-11 06:16:14,152][19082] Loop rollout_proc5_evt_loop terminating...
|
231 |
+
[2023-01-11 06:16:14,151][19083] Stopping RolloutWorker_w7...
|
232 |
+
[2023-01-11 06:16:14,151][19076] Stopping RolloutWorker_w1...
|
233 |
+
[2023-01-11 06:16:14,151][19078] Stopping RolloutWorker_w3...
|
234 |
+
[2023-01-11 06:16:14,154][19083] Loop rollout_proc7_evt_loop terminating...
|
235 |
+
[2023-01-11 06:16:14,154][19078] Loop rollout_proc3_evt_loop terminating...
|
236 |
+
[2023-01-11 06:16:14,154][19076] Loop rollout_proc1_evt_loop terminating...
|
237 |
+
[2023-01-11 06:16:14,151][19062] Stopping Batcher_0...
|
238 |
+
[2023-01-11 06:16:14,151][19081] Stopping RolloutWorker_w6...
|
239 |
+
[2023-01-11 06:16:14,151][19077] Stopping RolloutWorker_w0...
|
240 |
+
[2023-01-11 06:16:14,157][19062] Loop batcher_evt_loop terminating...
|
241 |
+
[2023-01-11 06:16:14,157][19077] Loop rollout_proc0_evt_loop terminating...
|
242 |
+
[2023-01-11 06:16:14,157][19081] Loop rollout_proc6_evt_loop terminating...
|
243 |
+
[2023-01-11 06:16:14,211][19075] Weights refcount: 2 0
|
244 |
+
[2023-01-11 06:16:14,213][19075] Stopping InferenceWorker_p0-w0...
|
245 |
+
[2023-01-11 06:16:14,214][19075] Loop inference_proc0-0_evt_loop terminating...
|
246 |
+
[2023-01-11 06:16:14,224][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000792_405504.pth...
|
247 |
+
[2023-01-11 06:16:14,232][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000568_290816.pth
|
248 |
+
[2023-01-11 06:16:14,233][19062] Stopping LearnerWorker_p0...
|
249 |
+
[2023-01-11 06:16:14,233][19062] Loop learner_proc0_evt_loop terminating...
|