folders:
  parent_dir: "./results/"
  model_name: "sr6_128x4_das_nc"

settings:
  game_id: "sfiii3n"
  step_ratio: 6
  frame_shape: !!python/tuple [128, 128, 1]
  continue_game: 0.0
  action_space: "discrete"
  characters: !!python/tuple ["Ken", "Ryu"]
  difficulty: 7
  outfits: 2

wrappers_settings:
  normalize_reward: true
  no_attack_buttons_combinations: true
  stack_frames: 4
  dilation: 1
  add_last_action: true
  stack_actions: 12
  scale: true
  exclude_image_scaling: true
  role_relative: true
  flatten: true
  filter_keys: ["action", "own_health", "opp_health", "own_side", "opp_side", "opp_character", "stage", "timer"]

policy_kwargs:
  #net_arch: [{ pi: [64, 64], vf: [32, 32] }]
  net_arch: [64, 64]

ppo_settings:
  gamma: 0.94
  model_checkpoint: "600000"     # 0: No checkpoint, else: Load checkpoint (if previously trained)
  learning_rate: [2.5e-4, 2.5e-6] # To start
  clip_range: [0.15, 0.025] # To start
  #learning_rate: [5.0e-5, 2.5e-6] # Fine Tuning
  #clip_range: [0.075, 0.025] # Fine Tuning
  batch_size: 512 #8 #nminibatches gave different batch size depending on the number of environments: batch_size = (n_steps * n_envs) // nminibatches
  n_epochs: 4
  n_steps: 512
  autosave_freq: 50000
  time_steps: 750000