Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,342 Bytes
c968fc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# Copyright (c) 2024 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import multiprocessing
import os
import subprocess
import time
from utils.logger import Logger
from utils.tool import get_gpu_nums
def run_script(args, gpu_id, self_id):
"""
Run the script by passing the GPU ID and self ID to environment variables and execute the main.py script.
Args:
gpu_id (int): ID of the GPU.
self_id (int): ID of the process.
Returns:
None
"""
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
env["SELF_ID"] = str(self_id)
command = (
f"source {args.conda_path} &&"
'eval "$(conda shell.bash hook)" && '
f"conda activate {args.conda_env_name} && "
"python main.py"
)
try:
process = subprocess.Popen(command, shell=True, env=env, executable="/bin/bash")
process.wait()
logger.info(f"Process for GPU {gpu_id} completed successfully.")
except KeyboardInterrupt:
logger.warning(f"Multi - GPU {gpu_id}: Interrupted by keyboard, exiting...")
except Exception as e:
logger.error(f"Error occurred for GPU {gpu_id}: {e}")
def main(args, self_id):
"""
Start multiple script tasks using multiple processes, each process using one GPU.
Args:
self_id (str): Identifier for the current process.
Returns:
None
"""
disabled_ids = []
if args.disabled_gpu_ids:
disabled_ids = [int(i) for i in args.disabled_gpu_ids.split(",")]
logger.info(f"CUDA_DISABLE_ID is set, not using: {disabled_ids}")
gpus_count = get_gpu_nums()
available_gpus = [i for i in range(gpus_count) if i not in disabled_ids]
processes = []
for gpu_id in available_gpus:
process = multiprocessing.Process(
target=run_script, args=(args, gpu_id, self_id)
)
process.start()
logger.info(f"GPU {gpu_id}: started...")
time.sleep(1)
processes.append(process)
for process in processes:
process.join()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--self_id", type=str, default="main_multi", help="Log ID")
parser.add_argument(
"--disabled_gpu_ids",
type=str,
default="",
help="Comma-separated list of disabled GPU IDs, default uses all available GPUs",
)
parser.add_argument(
"--conda_path",
type=str,
default="/opt/conda/etc/profile.d/conda.sh",
help="Conda path",
)
parser.add_argument(
"--conda_env_name",
type=str,
default="AudioPipeline",
help="Conda environment name",
)
parser.add_argument(
"--main_command_args",
type=str,
default="",
help="Main command args, check available options by `python main.py --help`",
)
args = parser.parse_args()
self_id = args.self_id
if "SELF_ID" in os.environ:
self_id = f"{self_id}_#{os.environ['SELF_ID']}"
logger = Logger.get_logger(self_id)
logger.info(f"Starting main_multi.py with self_id: {self_id}, args: {vars(args)}.")
main(args, self_id)
logger.info("Exiting main_multi.py...")
|