katielink commited on
Commit
0d32764
1 Parent(s): 613cc39

fix the wrong GPU index issue of multi-node

Browse files
configs/metadata.json CHANGED
@@ -1,7 +1,8 @@
1
  {
2
  "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
3
- "version": "0.1.8",
4
  "changelog": {
 
5
  "0.1.8": "Update evalaute doc, GPU usage details, and dataset preparation instructions",
6
  "0.1.7": "remove error dollar symbol in readme",
7
  "0.1.6": "add RAM usage with CacheDataset and GPU consumtion warning",
@@ -13,7 +14,7 @@
13
  "0.1.0": "complete the model package",
14
  "0.0.1": "initialize the model package structure"
15
  },
16
- "monai_version": "1.2.0rc6",
17
  "pytorch_version": "1.13.1",
18
  "numpy_version": "1.22.2",
19
  "optional_packages_version": {
 
1
  {
2
  "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
3
+ "version": "0.1.9",
4
  "changelog": {
5
+ "0.1.9": "fix the wrong GPU index issue of multi-node",
6
  "0.1.8": "Update evalaute doc, GPU usage details, and dataset preparation instructions",
7
  "0.1.7": "remove error dollar symbol in readme",
8
  "0.1.6": "add RAM usage with CacheDataset and GPU consumtion warning",
 
14
  "0.1.0": "complete the model package",
15
  "0.0.1": "initialize the model package structure"
16
  },
17
+ "monai_version": "1.2.0",
18
  "pytorch_version": "1.13.1",
19
  "numpy_version": "1.22.2",
20
  "optional_packages_version": {
configs/multi_gpu_evaluate.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "device": "$torch.device(f'cuda:{dist.get_rank()}')",
3
  "network": {
4
  "_target_": "torch.nn.parallel.DistributedDataParallel",
5
  "module": "$@network_def.to(@device)",
 
1
  {
2
+ "device": "$torch.device('cuda:' + os.environ['LOCAL_RANK'])",
3
  "network": {
4
  "_target_": "torch.nn.parallel.DistributedDataParallel",
5
  "module": "$@network_def.to(@device)",
configs/multi_gpu_train.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "device": "$torch.device(f'cuda:{dist.get_rank()}')",
3
  "network": {
4
  "_target_": "torch.nn.parallel.DistributedDataParallel",
5
  "module": "$@network_def.to(@device)",
 
1
  {
2
+ "device": "$torch.device('cuda:' + os.environ['LOCAL_RANK'])",
3
  "network": {
4
  "_target_": "torch.nn.parallel.DistributedDataParallel",
5
  "module": "$@network_def.to(@device)",