hishamcse
/

street-fighter-iii-ppo-diambra

Reinforcement Learning

street-fighter-iii

deep-reinforcement-learning

stable-baseline3

Model card Files Files and versions Community

hishamcse commited on Jul 11

Commit

90adb80

•

1 Parent(s): a07786c

Update README.md

Files changed (1) hide show

README.md +64 -0

README.md CHANGED Viewed

@@ -7,3 +7,67 @@ tags:
 - stable-baseline3
 ---

 - stable-baseline3
 ---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+A DRL agent playing Street Fighter III trained using diambra ai library
+## Model Details
+<!-- Provide the basic links for the model. -->
+- **My Code for this model:** [https://github.com/hishamcse/Advanced-DRL-Renegades-Game-Bots/tree/main/VI%20-%20Diambra_AI_Street-Fighter-III]
+- **Tutorial:** [https://github.com/alexpalms/deep-rl-class/blob/main/units/en/unitbonus3]
+- **Documentation:** [https://docs.diambra.ai/]
+## Training Details
+#### Training Hyperparameters
+'''
+  folders:
+    parent_dir: "./results/"
+    model_name: "sr6_128x4_das_nc"
+  settings:
+    game_id: "sfiii3n"
+    step_ratio: 6
+    frame_shape: !!python/tuple [128, 128, 1]
+    continue_game: 0.0
+    action_space: "discrete"
+    characters: "Ken"
+    difficulty: 6
+    outfits: 2
+  wrappers_settings:
+    normalize_reward: true
+    no_attack_buttons_combinations: true
+    stack_frames: 4
+    dilation: 1
+    add_last_action: true
+    stack_actions: 12
+    scale: true
+    exclude_image_scaling: true
+    role_relative: true
+    flatten: true
+    filter_keys: ["action", "own_health", "opp_health", "own_side", "opp_side", "opp_character", "stage", "timer"]
+  policy_kwargs:
+    #net_arch: [{ pi: [64, 64], vf: [32, 32] }]
+    net_arch: [64, 64]
+  ppo_settings:
+    gamma: 0.94
+    model_checkpoint: "0"     # 0: No checkpoint, 100000: Load checkpoint (if previously trained for 100000 steps)
+    learning_rate: [2.5e-4, 2.5e-6] # To start
+    clip_range: [0.15, 0.025] # To start
+    #learning_rate: [5.0e-5, 2.5e-6] # Fine Tuning
+    #clip_range: [0.075, 0.025] # Fine Tuning
+    batch_size: 512 #8 #nminibatches gave different batch size depending on the number of environments: batch_size = (n_steps * n_envs) // nminibatches
+    n_epochs: 4
+    n_steps: 512
+    autosave_freq: 10000
+    time_steps: 100000
+'''