File size: 3,342 Bytes
c968fc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# Copyright (c) 2024 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import argparse
import multiprocessing
import os
import subprocess
import time

from utils.logger import Logger
from utils.tool import get_gpu_nums


def run_script(args, gpu_id, self_id):
    """
    Run the script by passing the GPU ID and self ID to environment variables and execute the main.py script.

    Args:
        gpu_id (int): ID of the GPU.
        self_id (int): ID of the process.

    Returns:
        None
    """
    env = os.environ.copy()
    env["CUDA_VISIBLE_DEVICES"] = str(gpu_id)
    env["SELF_ID"] = str(self_id)

    command = (
        f"source {args.conda_path} &&"
        'eval "$(conda shell.bash hook)" && '
        f"conda activate {args.conda_env_name} && "
        "python main.py"
    )

    try:
        process = subprocess.Popen(command, shell=True, env=env, executable="/bin/bash")
        process.wait()
        logger.info(f"Process for GPU {gpu_id} completed successfully.")
    except KeyboardInterrupt:
        logger.warning(f"Multi - GPU {gpu_id}: Interrupted by keyboard, exiting...")
    except Exception as e:
        logger.error(f"Error occurred for GPU {gpu_id}: {e}")


def main(args, self_id):
    """
    Start multiple script tasks using multiple processes, each process using one GPU.

    Args:
        self_id (str): Identifier for the current process.

    Returns:
        None
    """
    disabled_ids = []
    if args.disabled_gpu_ids:
        disabled_ids = [int(i) for i in args.disabled_gpu_ids.split(",")]
        logger.info(f"CUDA_DISABLE_ID is set, not using: {disabled_ids}")

    gpus_count = get_gpu_nums()

    available_gpus = [i for i in range(gpus_count) if i not in disabled_ids]
    processes = []

    for gpu_id in available_gpus:
        process = multiprocessing.Process(
            target=run_script, args=(args, gpu_id, self_id)
        )
        process.start()
        logger.info(f"GPU {gpu_id}: started...")
        time.sleep(1)
        processes.append(process)

    for process in processes:
        process.join()


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--self_id", type=str, default="main_multi", help="Log ID")
    parser.add_argument(
        "--disabled_gpu_ids",
        type=str,
        default="",
        help="Comma-separated list of disabled GPU IDs, default uses all available GPUs",
    )
    parser.add_argument(
        "--conda_path",
        type=str,
        default="/opt/conda/etc/profile.d/conda.sh",
        help="Conda path",
    )
    parser.add_argument(
        "--conda_env_name",
        type=str,
        default="AudioPipeline",
        help="Conda environment name",
    )
    parser.add_argument(
        "--main_command_args",
        type=str,
        default="",
        help="Main command args, check available options by `python main.py --help`",
    )
    args = parser.parse_args()

    self_id = args.self_id
    if "SELF_ID" in os.environ:
        self_id = f"{self_id}_#{os.environ['SELF_ID']}"

    logger = Logger.get_logger(self_id)

    logger.info(f"Starting main_multi.py with self_id: {self_id}, args: {vars(args)}.")
    main(args, self_id)
    logger.info("Exiting main_multi.py...")