Update README.md
Browse files
README.md
CHANGED
@@ -7,3 +7,67 @@ tags:
|
|
7 |
- stable-baseline3
|
8 |
---
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
- stable-baseline3
|
8 |
---
|
9 |
|
10 |
+
# Model Card for Model ID
|
11 |
+
|
12 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
13 |
+
|
14 |
+
A DRL agent playing Street Fighter III trained using diambra ai library
|
15 |
+
|
16 |
+
## Model Details
|
17 |
+
|
18 |
+
<!-- Provide the basic links for the model. -->
|
19 |
+
|
20 |
+
- **My Code for this model:** [https://github.com/hishamcse/Advanced-DRL-Renegades-Game-Bots/tree/main/VI%20-%20Diambra_AI_Street-Fighter-III]
|
21 |
+
- **Tutorial:** [https://github.com/alexpalms/deep-rl-class/blob/main/units/en/unitbonus3]
|
22 |
+
- **Documentation:** [https://docs.diambra.ai/]
|
23 |
+
|
24 |
+
## Training Details
|
25 |
+
|
26 |
+
#### Training Hyperparameters
|
27 |
+
|
28 |
+
'''
|
29 |
+
folders:
|
30 |
+
parent_dir: "./results/"
|
31 |
+
model_name: "sr6_128x4_das_nc"
|
32 |
+
|
33 |
+
settings:
|
34 |
+
game_id: "sfiii3n"
|
35 |
+
step_ratio: 6
|
36 |
+
frame_shape: !!python/tuple [128, 128, 1]
|
37 |
+
continue_game: 0.0
|
38 |
+
action_space: "discrete"
|
39 |
+
characters: "Ken"
|
40 |
+
difficulty: 6
|
41 |
+
outfits: 2
|
42 |
+
|
43 |
+
wrappers_settings:
|
44 |
+
normalize_reward: true
|
45 |
+
no_attack_buttons_combinations: true
|
46 |
+
stack_frames: 4
|
47 |
+
dilation: 1
|
48 |
+
add_last_action: true
|
49 |
+
stack_actions: 12
|
50 |
+
scale: true
|
51 |
+
exclude_image_scaling: true
|
52 |
+
role_relative: true
|
53 |
+
flatten: true
|
54 |
+
filter_keys: ["action", "own_health", "opp_health", "own_side", "opp_side", "opp_character", "stage", "timer"]
|
55 |
+
|
56 |
+
policy_kwargs:
|
57 |
+
#net_arch: [{ pi: [64, 64], vf: [32, 32] }]
|
58 |
+
net_arch: [64, 64]
|
59 |
+
|
60 |
+
ppo_settings:
|
61 |
+
gamma: 0.94
|
62 |
+
model_checkpoint: "0" # 0: No checkpoint, 100000: Load checkpoint (if previously trained for 100000 steps)
|
63 |
+
learning_rate: [2.5e-4, 2.5e-6] # To start
|
64 |
+
clip_range: [0.15, 0.025] # To start
|
65 |
+
#learning_rate: [5.0e-5, 2.5e-6] # Fine Tuning
|
66 |
+
#clip_range: [0.075, 0.025] # Fine Tuning
|
67 |
+
batch_size: 512 #8 #nminibatches gave different batch size depending on the number of environments: batch_size = (n_steps * n_envs) // nminibatches
|
68 |
+
n_epochs: 4
|
69 |
+
n_steps: 512
|
70 |
+
autosave_freq: 10000
|
71 |
+
time_steps: 100000
|
72 |
+
|
73 |
+
'''
|