masterdezign
commited on
Commit
•
7beac7d
1
Parent(s):
6c123e0
10_000_000 iterations
Browse files- README.md +6 -42
- config.json +0 -0
- dqn2-SpaceInvadersNoFrameskip-v4.zip +3 -0
- dqn2-SpaceInvadersNoFrameskip-v4/_stable_baselines3_version +1 -0
- dqn2-SpaceInvadersNoFrameskip-v4/data +0 -0
- dqn2-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth +3 -0
- dqn2-SpaceInvadersNoFrameskip-v4/policy.pth +3 -0
- dqn2-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth +3 -0
- dqn2-SpaceInvadersNoFrameskip-v4/system_info.txt +7 -0
- results.json +1 -1
README.md
CHANGED
@@ -10,7 +10,7 @@ model-index:
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
-
value:
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
@@ -25,48 +25,12 @@ This is a trained model of a **DQN** agent playing **SpaceInvadersNoFrameskip-v4
|
|
25 |
using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
|
26 |
|
27 |
## Usage (with Stable-baselines3)
|
|
|
28 |
|
29 |
-
```python
|
30 |
-
from stable_baselines3.common.env_util import make_atari_env
|
31 |
-
from stable_baselines3.common.vec_env import VecFrameStack
|
32 |
-
from stable_baselines3 import DQN
|
33 |
-
from stable_baselines3.common.evaluation import evaluate_policy
|
34 |
-
from huggingface_sb3 import load_from_hub, package_to_hub
|
35 |
-
from stable_baselines3.common.utils import set_random_seed
|
36 |
-
|
37 |
-
env_id = "SpaceInvadersNoFrameskip-v4"
|
38 |
-
|
39 |
-
env = make_atari_env(env_id,
|
40 |
-
n_envs=12,
|
41 |
-
# Improving reproducibility
|
42 |
-
seed=1)
|
43 |
-
env = VecFrameStack(env, n_stack=4) # Stack last four images
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
# Using these parameters as default: https://huggingface.co/micheljperez/dqn-SpaceInvadersNoFrameskip-v4
|
49 |
-
model = DQN(policy = "CnnPolicy",
|
50 |
-
env = env,
|
51 |
-
batch_size = 32,
|
52 |
-
buffer_size = 100_000,
|
53 |
-
exploration_final_eps = 0.01,
|
54 |
-
exploration_fraction = 0.025,
|
55 |
-
gradient_steps = 1,
|
56 |
-
learning_rate = 1e-4,
|
57 |
-
learning_starts = 100_000,
|
58 |
-
optimize_memory_usage = True,
|
59 |
-
replay_buffer_kwargs = {"handle_timeout_termination": False},
|
60 |
-
target_update_interval = 1000,
|
61 |
-
train_freq = 4,
|
62 |
-
# normalize = False,
|
63 |
-
tensorboard_log = "./tensorboard",
|
64 |
-
verbose=1
|
65 |
-
)
|
66 |
-
|
67 |
-
f = load_from_hub('masterdezign/dqn-SpaceInvadersNoFrameskip-v4', 'dqn-SpaceInvadersNoFrameskip-v4.zip')
|
68 |
-
model = model.load(f)
|
69 |
|
70 |
-
|
71 |
-
print(f"Mean reward = {mean_reward:.2f} +/- {std_reward:.2f}")
|
72 |
```
|
|
|
10 |
results:
|
11 |
- metrics:
|
12 |
- type: mean_reward
|
13 |
+
value: 614.50 +/- 224.56
|
14 |
name: mean_reward
|
15 |
task:
|
16 |
type: reinforcement-learning
|
|
|
25 |
using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
|
26 |
|
27 |
## Usage (with Stable-baselines3)
|
28 |
+
TODO: Add your code
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
```python
|
32 |
+
from stable_baselines3 import ...
|
33 |
+
from huggingface_sb3 import load_from_hub
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
...
|
|
|
36 |
```
|
config.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
dqn2-SpaceInvadersNoFrameskip-v4.zip
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a779055156bda55a8d349d0461b011ad28ec05cd8f8f681fac399a7e6e5b8b6
|
3 |
+
size 28089704
|
dqn2-SpaceInvadersNoFrameskip-v4/_stable_baselines3_version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
1.6.0
|
dqn2-SpaceInvadersNoFrameskip-v4/data
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dqn2-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e480362a02e6ca77eb5c7c331634f8efe5293bf6152117417f309d33967791fc
|
3 |
+
size 13505611
|
dqn2-SpaceInvadersNoFrameskip-v4/policy.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ccc1d694d02b4889415a5a64a80c97dd4ce56964ad8c9b6bea5fccf3a71a0c49
|
3 |
+
size 13504937
|
dqn2-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d030ad8db708280fcae77d87e973102039acd23a11bdecc3db8eb6c0ac940ee1
|
3 |
+
size 431
|
dqn2-SpaceInvadersNoFrameskip-v4/system_info.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
OS: Linux-5.4.0-113-generic-x86_64-with-debian-buster-sid #127-Ubuntu SMP Wed May 18 14:30:56 UTC 2022
|
2 |
+
Python: 3.7.13
|
3 |
+
Stable-Baselines3: 1.6.0
|
4 |
+
PyTorch: 1.12.0
|
5 |
+
GPU Enabled: True
|
6 |
+
Numpy: 1.21.5
|
7 |
+
Gym: 0.21.0
|
results.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"mean_reward":
|
|
|
1 |
+
{"mean_reward": 614.5, "std_reward": 224.56012557887476, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2022-07-21T08:36:39.345391"}
|