Graphcore
thorinf commited on
Commit
4416d6b
1 Parent(s): 774002b

4-IPU base config

Browse files

Uses less IPUs than current model-hub config file. This does use a different assignment method in parallelisation, which will be PRd separately.

Files changed (1) hide show
  1. ipu_config.json +6 -6
ipu_config.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "optimizer_state_offchip": true,
3
  "replicated_tensor_sharding": true,
4
  "enable_half_first_order_momentum": true,
5
  "enable_half_partials": true,
6
  "recompute_checkpoint_every_layer": false,
7
- "device_iterations": 2,
8
  "inference_device_iterations": 4,
9
  "replication_factor": 2,
10
  "inference_replication_factor": 2,
11
- "gradient_accumulation_steps": 16,
12
  "executable_cache_dir": "./exe_cache",
13
- "ipus_per_replica": 8,
14
- "layers_per_ipu": [2, 1, 4, 3, 3, 3, 3, 0],
15
- "matmul_proportion": [0.1, 0.1, 0.1, 0.3, 0.3, 0.3, 0.3, 0.1]
16
  }
 
1
  {
2
+ "optimizer_state_offchip": false,
3
  "replicated_tensor_sharding": true,
4
  "enable_half_first_order_momentum": true,
5
  "enable_half_partials": true,
6
  "recompute_checkpoint_every_layer": false,
7
+ "device_iterations": 10,
8
  "inference_device_iterations": 4,
9
  "replication_factor": 2,
10
  "inference_replication_factor": 2,
11
+ "gradient_accumulation_steps": 32,
12
  "executable_cache_dir": "./exe_cache",
13
+ "ipus_per_replica": 4,
14
+ "layers_per_ipu": [6, 5, 5, 7],
15
+ "matmul_proportion": [0.2, 1.0, 1.0, 1.0]
16
  }