glenn-jocher
commited on
Commit
•
4bad914
1
Parent(s):
2c073cd
Update resume.py (#4115)
Browse files- utils/aws/resume.py +1 -1
utils/aws/resume.py
CHANGED
@@ -28,7 +28,7 @@ for last in path.rglob('*/**/last.pt'):
|
|
28 |
|
29 |
if ddp: # multi-GPU
|
30 |
port += 1
|
31 |
-
cmd = f'python -m torch.distributed.
|
32 |
else: # single-GPU
|
33 |
cmd = f'python train.py --resume {last}'
|
34 |
|
|
|
28 |
|
29 |
if ddp: # multi-GPU
|
30 |
port += 1
|
31 |
+
cmd = f'python -m torch.distributed.run --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
|
32 |
else: # single-GPU
|
33 |
cmd = f'python train.py --resume {last}'
|
34 |
|