fix the wrong GPU index issue of multi-node

Files changed (3) hide show

configs/metadata.json CHANGED Viewed

@@ -1,7 +1,8 @@
 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
-    "version": "0.1.8",
     "changelog": {
         "0.1.8": "Update evalaute doc, GPU usage details, and dataset preparation instructions",
         "0.1.7": "remove error dollar symbol in readme",
         "0.1.6": "add RAM usage with CacheDataset and GPU consumtion warning",
@@ -13,7 +14,7 @@
         "0.1.0": "complete the model package",
         "0.0.1": "initialize the model package structure"
     },
-    "monai_version": "1.2.0rc6",
     "pytorch_version": "1.13.1",
     "numpy_version": "1.22.2",
     "optional_packages_version": {

 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
+    "version": "0.1.9",
     "changelog": {
+        "0.1.9": "fix the wrong GPU index issue of multi-node",
         "0.1.8": "Update evalaute doc, GPU usage details, and dataset preparation instructions",
         "0.1.7": "remove error dollar symbol in readme",
         "0.1.6": "add RAM usage with CacheDataset and GPU consumtion warning",
         "0.1.0": "complete the model package",
         "0.0.1": "initialize the model package structure"
     },
+    "monai_version": "1.2.0",
     "pytorch_version": "1.13.1",
     "numpy_version": "1.22.2",
     "optional_packages_version": {

configs/multi_gpu_evaluate.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "device": "$torch.device(f'cuda:{dist.get_rank()}')",
     "network": {
         "_target_": "torch.nn.parallel.DistributedDataParallel",
         "module": "$@network_def.to(@device)",

 {
+    "device": "$torch.device('cuda:' + os.environ['LOCAL_RANK'])",
     "network": {
         "_target_": "torch.nn.parallel.DistributedDataParallel",
         "module": "$@network_def.to(@device)",

configs/multi_gpu_train.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "device": "$torch.device(f'cuda:{dist.get_rank()}')",
     "network": {
         "_target_": "torch.nn.parallel.DistributedDataParallel",
         "module": "$@network_def.to(@device)",

 {
+    "device": "$torch.device('cuda:' + os.environ['LOCAL_RANK'])",
     "network": {
         "_target_": "torch.nn.parallel.DistributedDataParallel",
         "module": "$@network_def.to(@device)",