Upload . with huggingface_hub

Browse files

Files changed (8) hide show

.summary/0/events.out.tfevents.1673417676.andrew-gpu +3 -0
README.md +42 -0
checkpoint_p0/best_000000760_389120_reward_42.463.pth +3 -0
checkpoint_p0/checkpoint_000000704_360448.pth +3 -0
checkpoint_p0/checkpoint_000000792_405504.pth +3 -0
config.json +129 -0
git.diff +186 -0
sf_log.txt +249 -0

.summary/0/events.out.tfevents.1673417676.andrew-gpu ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f5bcafd032d21d96cc07da746dd765a91f8560d22fcfe38b7f88fe133805a25
+size 61881

README.md ADDED Viewed

	@@ -0,0 +1,42 @@

+---
+library_name: sample-factory
+tags:
+- deep-reinforcement-learning
+- reinforcement-learning
+- sample-factory
+---
+A(n) **APPO** model trained on the **mujoco_ant** environment.
+This model was trained using Sample-Factory 2.0: https://github.com/alex-petrenko/sample-factory.
+Documentation for how to use Sample-Factory can be found at https://www.samplefactory.dev/
+## Downloading the model
+After installing Sample-Factory, download the model with:
+```
+python -m sample_factory.huggingface.load_from_hub -r andrewzhang505/ant_test4
+```
+## Using the model
+To run the model after download, use the `enjoy` script corresponding to this environment:
+```
+python -m <path.to.enjoy.module> --algo=APPO --env=mujoco_ant --train_dir=./train_dir --experiment=ant_test4
+```
+You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
+See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
+## Training with this model
+To continue training with this model, use the `train` script corresponding to this environment:
+```
+python -m <path.to.train.module> --algo=APPO --env=mujoco_ant --train_dir=./train_dir --experiment=ant_test4 --restart_behavior=resume --train_for_env_steps=10000000000
+```
+Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.

checkpoint_p0/best_000000760_389120_reward_42.463.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a81f537790ed40ff09681497ef77143cc8d01ccc205c906aba34cb26b5de07d
+size 89730

checkpoint_p0/checkpoint_000000704_360448.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73dc6a475005ac33458fe996f3e419e919e6bd0796db60dc206c5fe6796b842c
+size 89730

checkpoint_p0/checkpoint_000000792_405504.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d04c7d06b796cecc234541c6283270915c8bae02161032240b1ca8da7b346b8
+size 89730

config.json ADDED Viewed

	@@ -0,0 +1,129 @@

+{
+  "help": false,
+  "algo": "APPO",
+  "env": "mujoco_ant",
+  "experiment": "ant_test",
+  "train_dir": "/home/andrew_huggingface_co/sample-factory/train_dir",
+  "restart_behavior": "resume",
+  "device": "gpu",
+  "seed": null,
+  "num_policies": 1,
+  "async_rl": false,
+  "serial_mode": false,
+  "batched_sampling": false,
+  "num_batches_to_accumulate": 2,
+  "worker_num_splits": 2,
+  "policy_workers_per_policy": 1,
+  "max_policy_lag": 1000,
+  "num_workers": 8,
+  "num_envs_per_worker": 8,
+  "batch_size": 1024,
+  "num_batches_per_epoch": 4,
+  "num_epochs": 2,
+  "rollout": 64,
+  "recurrence": 1,
+  "shuffle_minibatches": false,
+  "gamma": 0.99,
+  "reward_scale": 1,
+  "reward_clip": 1000.0,
+  "value_bootstrap": true,
+  "normalize_returns": true,
+  "exploration_loss_coeff": 0.0,
+  "value_loss_coeff": 1.3,
+  "kl_loss_coeff": 0.1,
+  "exploration_loss": "entropy",
+  "gae_lambda": 0.95,
+  "ppo_clip_ratio": 0.2,
+  "ppo_clip_value": 1.0,
+  "with_vtrace": false,
+  "vtrace_rho": 1.0,
+  "vtrace_c": 1.0,
+  "optimizer": "adam",
+  "adam_eps": 1e-06,
+  "adam_beta1": 0.9,
+  "adam_beta2": 0.999,
+  "max_grad_norm": 3.5,
+  "learning_rate": 0.00295,
+  "lr_schedule": "linear_decay",
+  "lr_schedule_kl_threshold": 0.008,
+  "obs_subtract_mean": 0.0,
+  "obs_scale": 1.0,
+  "normalize_input": true,
+  "normalize_input_keys": null,
+  "decorrelate_experience_max_seconds": 0,
+  "decorrelate_envs_on_one_worker": true,
+  "actor_worker_gpus": [],
+  "set_workers_cpu_affinity": true,
+  "force_envs_single_thread": false,
+  "default_niceness": 0,
+  "log_to_file": true,
+  "experiment_summaries_interval": 3,
+  "flush_summaries_interval": 30,
+  "stats_avg": 100,
+  "summaries_use_frameskip": true,
+  "heartbeat_interval": 20,
+  "heartbeat_reporting_interval": 180,
+  "train_for_env_steps": 10000000,
+  "train_for_seconds": 10000000000,
+  "save_every_sec": 15,
+  "keep_checkpoints": 2,
+  "load_checkpoint_kind": "latest",
+  "save_milestones_sec": -1,
+  "save_best_every_sec": 5,
+  "save_best_metric": "reward",
+  "save_best_after": 100000,
+  "benchmark": false,
+  "encoder_mlp_layers": [
+    64,
+    64
+  ],
+  "encoder_conv_architecture": "convnet_simple",
+  "encoder_conv_mlp_layers": [
+    512
+  ],
+  "use_rnn": false,
+  "rnn_size": 512,
+  "rnn_type": "gru",
+  "rnn_num_layers": 1,
+  "decoder_mlp_layers": [],
+  "nonlinearity": "tanh",
+  "policy_initialization": "torch_default",
+  "policy_init_gain": 1.0,
+  "actor_critic_share_weights": true,
+  "adaptive_stddev": false,
+  "continuous_tanh_scale": 0.0,
+  "initial_stddev": 1.0,
+  "use_env_info_cache": false,
+  "env_gpu_actions": false,
+  "env_gpu_observations": true,
+  "env_frameskip": 1,
+  "env_framestack": 1,
+  "pixel_format": "CHW",
+  "use_record_episode_statistics": false,
+  "with_wandb": false,
+  "wandb_user": null,
+  "wandb_project": "sample_factory",
+  "wandb_group": null,
+  "wandb_job_type": "SF",
+  "wandb_tags": [],
+  "with_pbt": false,
+  "pbt_mix_policies_in_one_env": true,
+  "pbt_period_env_steps": 5000000,
+  "pbt_start_mutation": 20000000,
+  "pbt_replace_fraction": 0.3,
+  "pbt_mutation_rate": 0.15,
+  "pbt_replace_reward_gap": 0.1,
+  "pbt_replace_reward_gap_absolute": 1e-06,
+  "pbt_optimize_gamma": false,
+  "pbt_target_objective": "true_objective",
+  "pbt_perturb_min": 1.1,
+  "pbt_perturb_max": 1.5,
+  "command_line": "--algo=APPO --env=mujoco_ant --experiment=ant_test",
+  "cli_args": {
+    "algo": "APPO",
+    "env": "mujoco_ant",
+    "experiment": "ant_test"
+  },
+  "git_hash": "7355a9d939997e11f02d311f71e4ca3be2e9258f",
+  "git_repo_name": "https://github.com/andrewzhang505/sample-factory.git"
+}

git.diff ADDED Viewed

	@@ -0,0 +1,186 @@

+diff --git a/docs/10-huggingface/huggingface.md b/docs/10-huggingface/huggingface.md
+index 8846da73..1f1fae6f 100644
+--- a/docs/10-huggingface/huggingface.md
++++ b/docs/10-huggingface/huggingface.md
+@@ -77,10 +77,16 @@ You can also save a video of the model during evaluation to upload to the hub wi
+ - `--video_name`: The name of the video to save as. If `None`, will save to `replay.mp4` in your experiment directory
++Also, you can include information in the Hugging Face Hub model card for how to train and enjoy using this model. These parameters are optional:
++
++- `--train_script`: The module path for training this model
++
++- `--enjoy_script`: The module path for enjoying this model
++
+ For example:
+ ```
+-python -m sf_examples.mujoco.enjoy_mujoco --algo=APPO --env=mujoco_ant --experiment=<repo_name> --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=<username>/<hf_repo_name> --save_video --no_render
++python -m sf_examples.mujoco.enjoy_mujoco --algo=APPO --env=mujoco_ant --experiment=<repo_name> --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=<username>/<hf_repo_name> --save_video --no_render --enjoy_script=sf_examples.mujoco.enjoy_mujoco --train_script=sf_examples.mujoco.train_mujoco
+ ```
+ #### Using the push_to_hub Script
+@@ -95,4 +101,6 @@ The command line arguments are:
+ - `-r`: The repo_id to save on HF Hub. This is the same as `hf_repository` in the enjoy script and must be in the form `<hf_username>/<hf_repo_name>`
+-- `-d`: The full path to your experiment directory to upload
++- `-d`: The full path to your experiment directory to upload
++
++The optional arguments of `--train_script` and `--enjoy_script` can also be used. See the above section for more details
+diff --git a/sample_factory/cfg/arguments.py b/sample_factory/cfg/arguments.py
+index 820efce6..f736342d 100644
+--- a/sample_factory/cfg/arguments.py
++++ b/sample_factory/cfg/arguments.py
+@@ -18,7 +18,7 @@ from sample_factory.cfg.cfg import (
+ )
+ from sample_factory.utils.attr_dict import AttrDict
+ from sample_factory.utils.typing import Config
+-from sample_factory.utils.utils import cfg_file, cfg_file_old, get_git_commit_hash, get_top_level_script, log
++from sample_factory.utils.utils import cfg_file, cfg_file_old, get_git_commit_hash, log
+ def parse_sf_args(
+@@ -91,7 +91,6 @@ def postprocess_args(args, argv, parser) -> argparse.Namespace:
+     args.cli_args = vars(cli_args)
+     args.git_hash, args.git_repo_name = get_git_commit_hash()
+-    args.train_script = get_top_level_script()
+     return args
+diff --git a/sample_factory/cfg/cfg.py b/sample_factory/cfg/cfg.py
+index 43393da1..360e6895 100644
+--- a/sample_factory/cfg/cfg.py
++++ b/sample_factory/cfg/cfg.py
+@@ -675,6 +675,19 @@ def add_eval_args(parser):
+         help="False to sample from action distributions at test time. True to just use the argmax.",
+     )
++    parser.add_argument(
++        "--train_script",
++        default=None,
++        type=str,
++        help="Module name used to run training script. Used to generate HF model card",
++    )
++    parser.add_argument(
++        "--enjoy_script",
++        default=None,
++        type=str,
++        help="Module name used to run training script. Used to generate HF model card",
++    )
++
+ def add_wandb_args(p: ArgumentParser):
+     """Weights and Biases experiment monitoring."""
+diff --git a/sample_factory/enjoy.py b/sample_factory/enjoy.py
+index 341b537b..b620c532 100644
+--- a/sample_factory/enjoy.py
++++ b/sample_factory/enjoy.py
+@@ -21,7 +21,7 @@ from sample_factory.model.actor_critic import create_actor_critic
+ from sample_factory.model.model_utils import get_rnn_size
+ from sample_factory.utils.attr_dict import AttrDict
+ from sample_factory.utils.typing import Config, StatusCode
+-from sample_factory.utils.utils import debug_log_every_n, experiment_dir, get_top_level_script, log
++from sample_factory.utils.utils import debug_log_every_n, experiment_dir, log
+ def visualize_policy_inputs(normalized_obs: Dict[str, Tensor]) -> None:
+@@ -260,9 +260,8 @@ def enjoy(cfg: Config) -> Tuple[StatusCode, float]:
+         generate_replay_video(experiment_dir(cfg=cfg), video_frames, fps)
+     if cfg.push_to_hub:
+-        enjoy_name = get_top_level_script()
+         generate_model_card(
+-            experiment_dir(cfg=cfg), cfg.algo, cfg.env, cfg.hf_repository, reward_list, enjoy_name, cfg.train_script
++            experiment_dir(cfg=cfg), cfg.algo, cfg.env, cfg.hf_repository, reward_list, cfg.enjoy_script, cfg.train_script
+         )
+         push_to_hf(experiment_dir(cfg=cfg), cfg.hf_repository)
+diff --git a/sample_factory/huggingface/huggingface_utils.py b/sample_factory/huggingface/huggingface_utils.py
+index 90184da7..5b4a6b14 100644
+--- a/sample_factory/huggingface/huggingface_utils.py
++++ b/sample_factory/huggingface/huggingface_utils.py
+@@ -57,8 +57,10 @@ python -m sample_factory.huggingface.load_from_hub -r {repo_id}
+ ```\n
+     """
+-    if enjoy_name is not None:
+-        readme += f"""
++    if enjoy_name is None:
++        enjoy_name = "<path.to.enjoy.module>"
++
++    readme += f"""
+ ## Using the model\n
+ To run the model after download, use the `enjoy` script corresponding to this environment:
+ ```
+@@ -67,17 +69,19 @@ python -m {enjoy_name} --algo={algo} --env={env} --train_dir=./train_dir --exper
+ \n
+ You can also upload models to the Hugging Face Hub using the same script with the `--push_to_hub` flag.
+ See https://www.samplefactory.dev/10-huggingface/huggingface/ for more details
+-        """
++    """
+-    if train_name is not None:
+-        readme += f"""
++    if train_name is None:
++        train_name = "<path.to.train.module>"
++
++    readme += f"""
+ ## Training with this model\n
+ To continue training with this model, use the `train` script corresponding to this environment:
+ ```
+ python -m {train_name} --algo={algo} --env={env} --train_dir=./train_dir --experiment={repo_name} --restart_behavior=resume --train_for_env_steps=10000000000
+ ```\n
+ Note, you may have to adjust `--train_for_env_steps` to a suitably high number as the experiment will resume at the number of steps it concluded at.
+-        """
++    """
+     with open(readme_path, "w", encoding="utf-8") as f:
+         f.write(readme)
+diff --git a/sample_factory/huggingface/push_to_hub.py b/sample_factory/huggingface/push_to_hub.py
+index dbd5c382..d67806ad 100644
+--- a/sample_factory/huggingface/push_to_hub.py
++++ b/sample_factory/huggingface/push_to_hub.py
+@@ -16,6 +16,18 @@ def main():
+         type=str,
+     )
+     parser.add_argument("-d", "--experiment_dir", help="Path to your experiment directory", type=str)
++    parser.add_argument(
++        "--train_script",
++        default=None,
++        type=str,
++        help="Module name used to run training script. Used to generate HF model card",
++    )
++    parser.add_argument(
++        "--enjoy_script",
++        default=None,
++        type=str,
++        help="Module name used to run training script. Used to generate HF model card",
++    )
+     args = parser.parse_args()
+     cfg_file = os.path.join(args.experiment_dir, "config.json")
+@@ -34,7 +46,7 @@ def main():
+         json_params = json.load(json_file)
+         cfg = AttrDict(json_params)
+-    generate_model_card(args.experiment_dir, cfg.algo, cfg.env, args.hf_repository)
++    generate_model_card(args.experiment_dir, cfg.algo, cfg.env, args.hf_repository, enjoy_name=args.enjoy_script, train_name=args.train_script)
+     push_to_hf(args.experiment_dir, args.hf_repository)
+diff --git a/sample_factory/utils/utils.py b/sample_factory/utils/utils.py
+index 99db3c10..fcd335c5 100644
+--- a/sample_factory/utils/utils.py
++++ b/sample_factory/utils/utils.py
+@@ -493,5 +493,5 @@ def debug_log_every_n(n, msg, *args, **kwargs):
+     log_every_n(n, logging.DEBUG, msg, *args, **kwargs)
+-def get_top_level_script():
+-    return argv[0].split("sample-factory/")[-1][:-3].replace("/", ".")
++# def get_top_level_script():
++#     return argv[0].split("sample-factory/")[-1][:-3].replace("/", ".")

sf_log.txt ADDED Viewed

	@@ -0,0 +1,249 @@

+[2023-01-11 06:14:39,279][18976] Saving configuration to /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/config.json...
+[2023-01-11 06:14:39,292][18976] Rollout worker 0 uses device cpu
+[2023-01-11 06:14:39,292][18976] Rollout worker 1 uses device cpu
+[2023-01-11 06:14:39,293][18976] Rollout worker 2 uses device cpu
+[2023-01-11 06:14:39,293][18976] Rollout worker 3 uses device cpu
+[2023-01-11 06:14:39,293][18976] Rollout worker 4 uses device cpu
+[2023-01-11 06:14:39,293][18976] Rollout worker 5 uses device cpu
+[2023-01-11 06:14:39,293][18976] Rollout worker 6 uses device cpu
+[2023-01-11 06:14:39,293][18976] Rollout worker 7 uses device cpu
+[2023-01-11 06:14:39,293][18976] In synchronous mode, we only accumulate one batch. Setting num_batches_to_accumulate to 1
+[2023-01-11 06:14:39,316][18976] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 06:14:39,316][18976] InferenceWorker_p0-w0: min num requests: 2
+[2023-01-11 06:14:39,350][18976] Starting all processes...
+[2023-01-11 06:14:39,350][18976] Starting process learner_proc0
+[2023-01-11 06:14:39,356][18976] Starting all processes...
+[2023-01-11 06:14:39,362][18976] Starting process inference_proc0-0
+[2023-01-11 06:14:39,362][18976] Starting process rollout_proc0
+[2023-01-11 06:14:39,363][18976] Starting process rollout_proc1
+[2023-01-11 06:14:39,363][18976] Starting process rollout_proc2
+[2023-01-11 06:14:39,363][18976] Starting process rollout_proc3
+[2023-01-11 06:14:39,363][18976] Starting process rollout_proc4
+[2023-01-11 06:14:39,364][18976] Starting process rollout_proc5
+[2023-01-11 06:14:39,364][18976] Starting process rollout_proc6
+[2023-01-11 06:14:39,364][18976] Starting process rollout_proc7
+[2023-01-11 06:14:41,133][19078] Worker 3 uses CPU cores [3]
+[2023-01-11 06:14:41,149][19080] Worker 4 uses CPU cores [4]
+[2023-01-11 06:14:41,173][19081] Worker 6 uses CPU cores [6]
+[2023-01-11 06:14:41,236][19062] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 06:14:41,236][19062] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for learning process 0
+[2023-01-11 06:14:41,237][19079] Worker 2 uses CPU cores [2]
+[2023-01-11 06:14:41,419][19077] Worker 0 uses CPU cores [0]
+[2023-01-11 06:14:41,431][19076] Worker 1 uses CPU cores [1]
+[2023-01-11 06:14:41,458][19075] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 06:14:41,458][19075] Set environment var CUDA_VISIBLE_DEVICES to '0' (GPU indices [0]) for inference process 0
+[2023-01-11 06:14:41,495][19082] Worker 5 uses CPU cores [5]
+[2023-01-11 06:14:41,525][19083] Worker 7 uses CPU cores [7]
+[2023-01-11 06:14:41,971][19062] Num visible devices: 1
+[2023-01-11 06:14:41,971][19075] Num visible devices: 1
+[2023-01-11 06:14:42,022][19062] Starting seed is not provided
+[2023-01-11 06:14:42,022][19062] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 06:14:42,022][19062] Initializing actor-critic model on device cuda:0
+[2023-01-11 06:14:42,023][19062] RunningMeanStd input shape: (27,)
+[2023-01-11 06:14:42,023][19062] RunningMeanStd input shape: (1,)
+[2023-01-11 06:14:42,127][19062] Created Actor Critic model with architecture:
+[2023-01-11 06:14:42,128][19062] ActorCriticSharedWeights(
+  (obs_normalizer): ObservationNormalizer(
+    (running_mean_std): RunningMeanStdDictInPlace(
+      (running_mean_std): ModuleDict(
+        (obs): RunningMeanStdInPlace()
+      )
+    )
+  )
+  (returns_normalizer): RecursiveScriptModule(original_name=RunningMeanStdInPlace)
+  (encoder): MultiInputEncoder(
+    (encoders): ModuleDict(
+      (obs): MlpEncoder(
+        (mlp_head): RecursiveScriptModule(
+          original_name=Sequential
+          (0): RecursiveScriptModule(original_name=Linear)
+          (1): RecursiveScriptModule(original_name=Tanh)
+          (2): RecursiveScriptModule(original_name=Linear)
+          (3): RecursiveScriptModule(original_name=Tanh)
+        )
+      )
+    )
+  )
+  (core): ModelCoreIdentity()
+  (decoder): MlpDecoder(
+    (mlp): Identity()
+  )
+  (critic_linear): Linear(in_features=64, out_features=1, bias=True)
+  (action_parameterization): ActionParameterizationContinuousNonAdaptiveStddev(
+    (distribution_linear): Linear(in_features=64, out_features=8, bias=True)
+  )
+)
+[2023-01-11 06:14:47,316][19062] Using optimizer <class 'torch.optim.adam.Adam'>
+[2023-01-11 06:14:47,317][19062] No checkpoints found
+[2023-01-11 06:14:47,317][19062] Did not load from checkpoint, starting from scratch!
+[2023-01-11 06:14:47,318][19062] Initialized policy 0 weights for model version 0
+[2023-01-11 06:14:47,321][19062] LearnerWorker_p0 finished initialization!
+[2023-01-11 06:14:47,322][19062] Using GPUs [0] for process 0 (actually maps to GPUs [0])
+[2023-01-11 06:14:47,429][19075] RunningMeanStd input shape: (27,)
+[2023-01-11 06:14:47,429][19075] RunningMeanStd input shape: (1,)
+[2023-01-11 06:14:50,677][18976] Inference worker 0-0 is ready!
+[2023-01-11 06:14:50,678][18976] All inference workers are ready! Signal rollout workers to start!
+[2023-01-11 06:14:50,880][19081] Decorrelating experience for 0 frames...
+[2023-01-11 06:14:50,881][19076] Decorrelating experience for 0 frames...
+[2023-01-11 06:14:50,882][19081] Decorrelating experience for 64 frames...
+[2023-01-11 06:14:50,883][19076] Decorrelating experience for 64 frames...
+[2023-01-11 06:14:50,883][19077] Decorrelating experience for 0 frames...
+[2023-01-11 06:14:50,884][19080] Decorrelating experience for 0 frames...
+[2023-01-11 06:14:50,884][19079] Decorrelating experience for 0 frames...
+[2023-01-11 06:14:50,885][19082] Decorrelating experience for 0 frames...
+[2023-01-11 06:14:50,885][19078] Decorrelating experience for 0 frames...
+[2023-01-11 06:14:50,885][19083] Decorrelating experience for 0 frames...
+[2023-01-11 06:14:50,885][19077] Decorrelating experience for 64 frames...
+[2023-01-11 06:14:50,886][19080] Decorrelating experience for 64 frames...
+[2023-01-11 06:14:50,886][19079] Decorrelating experience for 64 frames...
+[2023-01-11 06:14:50,887][19082] Decorrelating experience for 64 frames...
+[2023-01-11 06:14:50,887][19078] Decorrelating experience for 64 frames...
+[2023-01-11 06:14:50,887][19083] Decorrelating experience for 64 frames...
+[2023-01-11 06:14:50,938][19076] Decorrelating experience for 128 frames...
+[2023-01-11 06:14:50,939][19081] Decorrelating experience for 128 frames...
+[2023-01-11 06:14:50,940][19077] Decorrelating experience for 128 frames...
+[2023-01-11 06:14:50,941][19080] Decorrelating experience for 128 frames...
+[2023-01-11 06:14:50,943][19083] Decorrelating experience for 128 frames...
+[2023-01-11 06:14:50,943][19078] Decorrelating experience for 128 frames...
+[2023-01-11 06:14:50,943][19079] Decorrelating experience for 128 frames...
+[2023-01-11 06:14:50,944][19082] Decorrelating experience for 128 frames...
+[2023-01-11 06:14:51,044][19076] Decorrelating experience for 192 frames...
+[2023-01-11 06:14:51,047][19081] Decorrelating experience for 192 frames...
+[2023-01-11 06:14:51,047][19077] Decorrelating experience for 192 frames...
+[2023-01-11 06:14:51,050][19083] Decorrelating experience for 192 frames...
+[2023-01-11 06:14:51,050][19080] Decorrelating experience for 192 frames...
+[2023-01-11 06:14:51,051][19078] Decorrelating experience for 192 frames...
+[2023-01-11 06:14:51,051][19082] Decorrelating experience for 192 frames...
+[2023-01-11 06:14:51,054][19079] Decorrelating experience for 192 frames...
+[2023-01-11 06:14:51,235][19076] Decorrelating experience for 256 frames...
+[2023-01-11 06:14:51,238][19083] Decorrelating experience for 256 frames...
+[2023-01-11 06:14:51,239][19077] Decorrelating experience for 256 frames...
+[2023-01-11 06:14:51,242][19078] Decorrelating experience for 256 frames...
+[2023-01-11 06:14:51,243][19081] Decorrelating experience for 256 frames...
+[2023-01-11 06:14:51,247][19080] Decorrelating experience for 256 frames...
+[2023-01-11 06:14:51,248][19082] Decorrelating experience for 256 frames...
+[2023-01-11 06:14:51,251][19079] Decorrelating experience for 256 frames...
+[2023-01-11 06:14:51,333][18976] Fps is (10 sec: nan, 60 sec: nan, 300 sec: nan). Total num frames: 0. Throughput: 0: nan. Samples: 0. Policy #0 lag: (min: -1.0, avg: -1.0, max: -1.0)
+[2023-01-11 06:14:51,335][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth...
+[2023-01-11 06:14:51,445][19083] Decorrelating experience for 320 frames...
+[2023-01-11 06:14:51,447][19077] Decorrelating experience for 320 frames...
+[2023-01-11 06:14:51,450][19076] Decorrelating experience for 320 frames...
+[2023-01-11 06:14:51,452][19078] Decorrelating experience for 320 frames...
+[2023-01-11 06:14:51,453][19081] Decorrelating experience for 320 frames...
+[2023-01-11 06:14:51,457][19082] Decorrelating experience for 320 frames...
+[2023-01-11 06:14:51,458][19080] Decorrelating experience for 320 frames...
+[2023-01-11 06:14:51,465][19079] Decorrelating experience for 320 frames...
+[2023-01-11 06:14:51,706][19083] Decorrelating experience for 384 frames...
+[2023-01-11 06:14:51,712][19076] Decorrelating experience for 384 frames...
+[2023-01-11 06:14:51,716][19081] Decorrelating experience for 384 frames...
+[2023-01-11 06:14:51,717][19078] Decorrelating experience for 384 frames...
+[2023-01-11 06:14:51,717][19077] Decorrelating experience for 384 frames...
+[2023-01-11 06:14:51,723][19082] Decorrelating experience for 384 frames...
+[2023-01-11 06:14:51,724][19080] Decorrelating experience for 384 frames...
+[2023-01-11 06:14:51,737][19079] Decorrelating experience for 384 frames...
+[2023-01-11 06:14:52,017][19083] Decorrelating experience for 448 frames...
+[2023-01-11 06:14:52,029][19076] Decorrelating experience for 448 frames...
+[2023-01-11 06:14:52,036][19077] Decorrelating experience for 448 frames...
+[2023-01-11 06:14:52,037][19082] Decorrelating experience for 448 frames...
+[2023-01-11 06:14:52,038][19081] Decorrelating experience for 448 frames...
+[2023-01-11 06:14:52,041][19078] Decorrelating experience for 448 frames...
+[2023-01-11 06:14:52,053][19080] Decorrelating experience for 448 frames...
+[2023-01-11 06:14:52,064][19079] Decorrelating experience for 448 frames...
+[2023-01-11 06:14:56,333][18976] Fps is (10 sec: 819.2, 60 sec: 819.2, 300 sec: 819.2). Total num frames: 4096. Throughput: 0: 177.6. Samples: 888. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 06:14:56,333][18976] Avg episode reward: [(0, '-126.473')]
+[2023-01-11 06:14:59,307][18976] Heartbeat connected on Batcher_0
+[2023-01-11 06:14:59,311][18976] Heartbeat connected on LearnerWorker_p0
+[2023-01-11 06:14:59,322][18976] Heartbeat connected on RolloutWorker_w0
+[2023-01-11 06:14:59,326][18976] Heartbeat connected on InferenceWorker_p0-w0
+[2023-01-11 06:14:59,331][18976] Heartbeat connected on RolloutWorker_w1
+[2023-01-11 06:14:59,335][18976] Heartbeat connected on RolloutWorker_w2
+[2023-01-11 06:14:59,340][18976] Heartbeat connected on RolloutWorker_w3
+[2023-01-11 06:14:59,341][18976] Heartbeat connected on RolloutWorker_w5
+[2023-01-11 06:14:59,341][18976] Heartbeat connected on RolloutWorker_w4
+[2023-01-11 06:14:59,351][18976] Heartbeat connected on RolloutWorker_w6
+[2023-01-11 06:14:59,351][18976] Heartbeat connected on RolloutWorker_w7
+[2023-01-11 06:15:01,333][18976] Fps is (10 sec: 3276.8, 60 sec: 3276.8, 300 sec: 3276.8). Total num frames: 32768. Throughput: 0: 2408.4. Samples: 24084. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+[2023-01-11 06:15:01,333][18976] Avg episode reward: [(0, '-130.992')]
+[2023-01-11 06:15:03,917][19075] Updated weights for policy 0, policy_version 80 (0.0008)
+[2023-01-11 06:15:06,333][18976] Fps is (10 sec: 4096.0, 60 sec: 3003.7, 300 sec: 3003.7). Total num frames: 45056. Throughput: 0: 3008.0. Samples: 45120. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+[2023-01-11 06:15:06,334][18976] Avg episode reward: [(0, '-227.535')]
+[2023-01-11 06:15:06,339][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000088_45056.pth...
+[2023-01-11 06:15:11,333][18976] Fps is (10 sec: 4096.0, 60 sec: 3686.4, 300 sec: 3686.4). Total num frames: 73728. Throughput: 0: 3020.8. Samples: 60416. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 06:15:11,333][18976] Avg episode reward: [(0, '-297.337')]
+[2023-01-11 06:15:12,628][19075] Updated weights for policy 0, policy_version 160 (0.0007)
+[2023-01-11 06:15:16,332][18976] Fps is (10 sec: 5324.9, 60 sec: 3932.2, 300 sec: 3932.2). Total num frames: 98304. Throughput: 0: 3737.8. Samples: 93444. Policy #0 lag: (min: 2.0, avg: 2.0, max: 2.0)
+[2023-01-11 06:15:16,333][18976] Avg episode reward: [(0, '-127.822')]
+[2023-01-11 06:15:20,209][19075] Updated weights for policy 0, policy_version 240 (0.0007)
+[2023-01-11 06:15:21,333][18976] Fps is (10 sec: 5324.8, 60 sec: 4232.5, 300 sec: 4232.5). Total num frames: 126976. Throughput: 0: 4195.5. Samples: 125864. Policy #0 lag: (min: 5.0, avg: 5.0, max: 5.0)
+[2023-01-11 06:15:21,333][18976] Avg episode reward: [(0, '-105.405')]
+[2023-01-11 06:15:21,340][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000248_126976.pth...
+[2023-01-11 06:15:21,347][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000000_0.pth
+[2023-01-11 06:15:21,348][19062] Saving new best policy, reward=-105.405!
+[2023-01-11 06:15:26,333][18976] Fps is (10 sec: 5734.3, 60 sec: 4447.1, 300 sec: 4447.1). Total num frames: 155648. Throughput: 0: 4065.3. Samples: 142284. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 06:15:26,333][18976] Avg episode reward: [(0, '-109.426')]
+[2023-01-11 06:15:27,645][19075] Updated weights for policy 0, policy_version 320 (0.0007)
+[2023-01-11 06:15:31,332][18976] Fps is (10 sec: 5324.9, 60 sec: 4505.6, 300 sec: 4505.6). Total num frames: 180224. Throughput: 0: 4384.3. Samples: 175372. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 06:15:31,333][18976] Avg episode reward: [(0, '-72.518')]
+[2023-01-11 06:15:31,333][19062] Saving new best policy, reward=-72.518!
+[2023-01-11 06:15:35,206][19075] Updated weights for policy 0, policy_version 400 (0.0007)
+[2023-01-11 06:15:36,333][18976] Fps is (10 sec: 5324.8, 60 sec: 4642.1, 300 sec: 4642.1). Total num frames: 208896. Throughput: 0: 4618.4. Samples: 207828. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 06:15:36,333][18976] Avg episode reward: [(0, '-35.778')]
+[2023-01-11 06:15:36,339][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000408_208896.pth...
+[2023-01-11 06:15:36,347][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000088_45056.pth
+[2023-01-11 06:15:36,347][19062] Saving new best policy, reward=-35.778!
+[2023-01-11 06:15:41,333][18976] Fps is (10 sec: 5734.3, 60 sec: 4751.4, 300 sec: 4751.4). Total num frames: 237568. Throughput: 0: 4961.7. Samples: 224164. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+[2023-01-11 06:15:41,333][18976] Avg episode reward: [(0, '-30.077')]
+[2023-01-11 06:15:41,334][19062] Saving new best policy, reward=-30.077!
+[2023-01-11 06:15:42,735][19075] Updated weights for policy 0, policy_version 480 (0.0007)
+[2023-01-11 06:15:46,333][18976] Fps is (10 sec: 5324.9, 60 sec: 4766.3, 300 sec: 4766.3). Total num frames: 262144. Throughput: 0: 5175.8. Samples: 256996. Policy #0 lag: (min: 3.0, avg: 3.0, max: 3.0)
+[2023-01-11 06:15:46,333][18976] Avg episode reward: [(0, '-40.853')]
+[2023-01-11 06:15:50,214][19075] Updated weights for policy 0, policy_version 560 (0.0007)
+[2023-01-11 06:15:51,333][18976] Fps is (10 sec: 5324.8, 60 sec: 4846.9, 300 sec: 4846.9). Total num frames: 290816. Throughput: 0: 5434.8. Samples: 289688. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 06:15:51,333][18976] Avg episode reward: [(0, '-38.381')]
+[2023-01-11 06:15:51,340][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000568_290816.pth...
+[2023-01-11 06:15:51,347][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000248_126976.pth
+[2023-01-11 06:15:56,333][18976] Fps is (10 sec: 5734.3, 60 sec: 5256.5, 300 sec: 4915.2). Total num frames: 319488. Throughput: 0: 5459.1. Samples: 306076. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+[2023-01-11 06:15:56,333][18976] Avg episode reward: [(0, '-20.183')]
+[2023-01-11 06:15:56,334][19062] Saving new best policy, reward=-20.183!
+[2023-01-11 06:15:57,590][19075] Updated weights for policy 0, policy_version 640 (0.0006)
+[2023-01-11 06:16:01,333][18976] Fps is (10 sec: 5324.9, 60 sec: 5188.3, 300 sec: 4915.2). Total num frames: 344064. Throughput: 0: 5477.0. Samples: 339908. Policy #0 lag: (min: 1.0, avg: 1.0, max: 1.0)
+[2023-01-11 06:16:01,333][18976] Avg episode reward: [(0, '7.282')]
+[2023-01-11 06:16:01,335][19062] Saving new best policy, reward=7.282!
+[2023-01-11 06:16:06,333][18976] Fps is (10 sec: 4096.0, 60 sec: 5256.5, 300 sec: 4806.0). Total num frames: 360448. Throughput: 0: 5188.2. Samples: 359332. Policy #0 lag: (min: 4.0, avg: 4.0, max: 4.0)
+[2023-01-11 06:16:06,333][18976] Avg episode reward: [(0, '20.695')]
+[2023-01-11 06:16:06,339][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000704_360448.pth...
+[2023-01-11 06:16:06,346][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000408_208896.pth
+[2023-01-11 06:16:06,347][19062] Saving new best policy, reward=20.695!
+[2023-01-11 06:16:07,412][19075] Updated weights for policy 0, policy_version 720 (0.0007)
+[2023-01-11 06:16:11,332][18976] Fps is (10 sec: 4505.6, 60 sec: 5256.5, 300 sec: 4864.0). Total num frames: 389120. Throughput: 0: 5189.2. Samples: 375796. Policy #0 lag: (min: 7.0, avg: 7.0, max: 7.0)
+[2023-01-11 06:16:11,333][18976] Avg episode reward: [(0, '42.463')]
+[2023-01-11 06:16:11,334][19062] Saving new best policy, reward=42.463!
+[2023-01-11 06:16:14,150][18976] Keyboard interrupt detected in the event loop EvtLoop [Runner_EvtLoop, process=main process 18976], exiting...
+[2023-01-11 06:16:14,150][18976] Runner profile tree view:
+main_loop: 94.8007
+[2023-01-11 06:16:14,150][19080] Stopping RolloutWorker_w4...
+[2023-01-11 06:16:14,151][19079] Stopping RolloutWorker_w2...
+[2023-01-11 06:16:14,151][18976] Collected {0: 401408}, FPS: 4234.2
+[2023-01-11 06:16:14,151][19082] Stopping RolloutWorker_w5...
+[2023-01-11 06:16:14,151][19079] Loop rollout_proc2_evt_loop terminating...
+[2023-01-11 06:16:14,151][19080] Loop rollout_proc4_evt_loop terminating...
+[2023-01-11 06:16:14,152][19082] Loop rollout_proc5_evt_loop terminating...
+[2023-01-11 06:16:14,151][19083] Stopping RolloutWorker_w7...
+[2023-01-11 06:16:14,151][19076] Stopping RolloutWorker_w1...
+[2023-01-11 06:16:14,151][19078] Stopping RolloutWorker_w3...
+[2023-01-11 06:16:14,154][19083] Loop rollout_proc7_evt_loop terminating...
+[2023-01-11 06:16:14,154][19078] Loop rollout_proc3_evt_loop terminating...
+[2023-01-11 06:16:14,154][19076] Loop rollout_proc1_evt_loop terminating...
+[2023-01-11 06:16:14,151][19062] Stopping Batcher_0...
+[2023-01-11 06:16:14,151][19081] Stopping RolloutWorker_w6...
+[2023-01-11 06:16:14,151][19077] Stopping RolloutWorker_w0...
+[2023-01-11 06:16:14,157][19062] Loop batcher_evt_loop terminating...
+[2023-01-11 06:16:14,157][19077] Loop rollout_proc0_evt_loop terminating...
+[2023-01-11 06:16:14,157][19081] Loop rollout_proc6_evt_loop terminating...
+[2023-01-11 06:16:14,211][19075] Weights refcount: 2 0
+[2023-01-11 06:16:14,213][19075] Stopping InferenceWorker_p0-w0...
+[2023-01-11 06:16:14,214][19075] Loop inference_proc0-0_evt_loop terminating...
+[2023-01-11 06:16:14,224][19062] Saving /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000792_405504.pth...
+[2023-01-11 06:16:14,232][19062] Removing /home/andrew_huggingface_co/sample-factory/train_dir/ant_test/checkpoint_p0/checkpoint_000000568_290816.pth
+[2023-01-11 06:16:14,233][19062] Stopping LearnerWorker_p0...
+[2023-01-11 06:16:14,233][19062] Loop learner_proc0_evt_loop terminating...