|
|
|
|
|
|
|
|
|
|
|
import argparse |
|
import multiprocessing |
|
import os |
|
import subprocess |
|
import time |
|
|
|
from utils.logger import Logger |
|
from utils.tool import get_gpu_nums |
|
|
|
|
|
def run_script(args, gpu_id, self_id): |
|
""" |
|
Run the script by passing the GPU ID and self ID to environment variables and execute the main.py script. |
|
|
|
Args: |
|
gpu_id (int): ID of the GPU. |
|
self_id (int): ID of the process. |
|
|
|
Returns: |
|
None |
|
""" |
|
env = os.environ.copy() |
|
env["CUDA_VISIBLE_DEVICES"] = str(gpu_id) |
|
env["SELF_ID"] = str(self_id) |
|
|
|
command = ( |
|
f"source {args.conda_path} &&" |
|
'eval "$(conda shell.bash hook)" && ' |
|
f"conda activate {args.conda_env_name} && " |
|
"python main.py" |
|
) |
|
|
|
try: |
|
process = subprocess.Popen(command, shell=True, env=env, executable="/bin/bash") |
|
process.wait() |
|
logger.info(f"Process for GPU {gpu_id} completed successfully.") |
|
except KeyboardInterrupt: |
|
logger.warning(f"Multi - GPU {gpu_id}: Interrupted by keyboard, exiting...") |
|
except Exception as e: |
|
logger.error(f"Error occurred for GPU {gpu_id}: {e}") |
|
|
|
|
|
def main(args, self_id): |
|
""" |
|
Start multiple script tasks using multiple processes, each process using one GPU. |
|
|
|
Args: |
|
self_id (str): Identifier for the current process. |
|
|
|
Returns: |
|
None |
|
""" |
|
disabled_ids = [] |
|
if args.disabled_gpu_ids: |
|
disabled_ids = [int(i) for i in args.disabled_gpu_ids.split(",")] |
|
logger.info(f"CUDA_DISABLE_ID is set, not using: {disabled_ids}") |
|
|
|
gpus_count = get_gpu_nums() |
|
|
|
available_gpus = [i for i in range(gpus_count) if i not in disabled_ids] |
|
processes = [] |
|
|
|
for gpu_id in available_gpus: |
|
process = multiprocessing.Process( |
|
target=run_script, args=(args, gpu_id, self_id) |
|
) |
|
process.start() |
|
logger.info(f"GPU {gpu_id}: started...") |
|
time.sleep(1) |
|
processes.append(process) |
|
|
|
for process in processes: |
|
process.join() |
|
|
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--self_id", type=str, default="main_multi", help="Log ID") |
|
parser.add_argument( |
|
"--disabled_gpu_ids", |
|
type=str, |
|
default="", |
|
help="Comma-separated list of disabled GPU IDs, default uses all available GPUs", |
|
) |
|
parser.add_argument( |
|
"--conda_path", |
|
type=str, |
|
default="/opt/conda/etc/profile.d/conda.sh", |
|
help="Conda path", |
|
) |
|
parser.add_argument( |
|
"--conda_env_name", |
|
type=str, |
|
default="AudioPipeline", |
|
help="Conda environment name", |
|
) |
|
parser.add_argument( |
|
"--main_command_args", |
|
type=str, |
|
default="", |
|
help="Main command args, check available options by `python main.py --help`", |
|
) |
|
args = parser.parse_args() |
|
|
|
self_id = args.self_id |
|
if "SELF_ID" in os.environ: |
|
self_id = f"{self_id}_#{os.environ['SELF_ID']}" |
|
|
|
logger = Logger.get_logger(self_id) |
|
|
|
logger.info(f"Starting main_multi.py with self_id: {self_id}, args: {vars(args)}.") |
|
main(args, self_id) |
|
logger.info("Exiting main_multi.py...") |
|
|