fix the wrong GPU index issue of multi-node

Files changed (3) hide show

configs/metadata.json CHANGED Viewed

@@ -1,7 +1,8 @@
 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
-    "version": "0.1.3",
     "changelog": {
         "0.1.3": "remove error dollar symbol in readme",
         "0.1.2": "add RAM warning",
         "0.1.1": "enable deterministic eval and inference",
@@ -16,7 +17,7 @@
         "0.0.2": "Update The Torch Vision Transform",
         "0.0.1": "initialize the model package structure"
     },
-    "monai_version": "1.2.0rc6",
     "pytorch_version": "1.13.1",
     "numpy_version": "1.22.2",
     "optional_packages_version": {

 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20220324.json",
+    "version": "0.1.4",
     "changelog": {
+        "0.1.4": "fix the wrong GPU index issue of multi-node",
         "0.1.3": "remove error dollar symbol in readme",
         "0.1.2": "add RAM warning",
         "0.1.1": "enable deterministic eval and inference",
         "0.0.2": "Update The Torch Vision Transform",
         "0.0.1": "initialize the model package structure"
     },
+    "monai_version": "1.2.0",
     "pytorch_version": "1.13.1",
     "numpy_version": "1.22.2",
     "optional_packages_version": {

configs/multi_gpu_evaluate.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "device": "$torch.device(f'cuda:{dist.get_rank()}')",
     "network": {
         "_target_": "torch.nn.parallel.DistributedDataParallel",
         "module": "$@network_def.to(@device)",

 {
+    "device": "$torch.device('cuda:' + os.environ['LOCAL_RANK'])",
     "network": {
         "_target_": "torch.nn.parallel.DistributedDataParallel",
         "module": "$@network_def.to(@device)",

configs/multi_gpu_train.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-    "device": "$torch.device(f'cuda:{dist.get_rank()}')",
     "network": {
         "_target_": "torch.nn.parallel.DistributedDataParallel",
         "module": "$@network_def.to(@device)",

 {
+    "device": "$torch.device('cuda:' + os.environ['LOCAL_RANK'])",
     "network": {
         "_target_": "torch.nn.parallel.DistributedDataParallel",
         "module": "$@network_def.to(@device)",