import os import torch import librosa import soundfile as sf import gradio as gr from fairseq import checkpoint_utils # 配置路径 MODEL_PATH = "ayumi.pth" # RVC 微调模型路径 INDEX_PATH = "added_IVF738_Flat_nprobe_1_ayumi_v2.index" # RVC 索引文件路径 TARGET_SAMPLE_RATE = 16000 # 目标采样率 OUTPUT_AUDIO_PATH = "converted_audio.wav" # 转换后的音频保存路径 # 加载模型 def load_rvc_model(model_path): print("加载 RVC 模型中...") model, cfg, task = checkpoint_utils.load_model_ensemble_and_task([model_path]) model = model[0].eval().cuda() print("模型加载成功") return model # 预处理音频 def preprocess_audio(file_path, target_sr=16000): audio, sr = librosa.load(file_path, sr=target_sr) return audio, sr # 声音转换 def convert_audio(model, input_audio, sr): with torch.no_grad(): input_tensor = torch.tensor(input_audio).unsqueeze(0).float().cuda() output_audio = model(input_tensor).cpu().numpy() return output_audio # 加载模型 rvc_model = load_rvc_model(MODEL_PATH) # Gradio 接口处理函数 def process_audio(file): # 加载用户上传的音频 input_audio, sr = preprocess_audio(file.name, TARGET_SAMPLE_RATE) print(f"加载音频完成,采样率:{sr}") # 调用 RVC 模型转换音频 converted_audio = convert_audio(rvc_model, input_audio, sr) print("音频转换完成") # 保存输出音频 sf.write(OUTPUT_AUDIO_PATH, converted_audio, sr) return OUTPUT_AUDIO_PATH # 构建 Gradio 界面 interface = gr.Interface( fn=process_audio, inputs=gr.Audio(label="上传音频", type="file"), outputs=gr.Audio(label="转换后的音频"), title="RVC 音色转换", description="上传任意音频,使用微调的 RVC 模型将其转换为目标音色。" ) # 启动应用 if __name__ == "__main__": interface.launch()