File size: 2,247 Bytes
5f84dff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import io
import logging

import librosa
import soundfile
from flask import Flask, request, send_file
from flask_cors import CORS

#from infer_tools.infer_tool import Svc
from inference_vst import SvcFish
#from utils.hparams import hparams

app = Flask(__name__)

CORS(app)

logging.getLogger('numba').setLevel(logging.WARNING)


@app.route("/voiceChangeModel", methods=["POST"])
def voice_change_model():
    request_form = request.form
    wave_file = request.files.get("sample", None)
    # 变调信息
    f_pitch_change = float(request_form.get("fPitchChange", 0))
    # 获取spkid
    int_speak_Id = int(request_form.get("sSpeakId", 0))
    # DAW所需的采样率
    daw_sample = int(float(request_form.get("sampleRate", 0)))
    # http获得wav文件并转换
    input_wav_path = io.BytesIO(wave_file.read())
    # 模型推理
    _audio, _model_sr = svc_model.infer(input_wav_path, f_pitch_change, int_speak_Id, daw_sample)
    tar_audio = librosa.resample(_audio, _model_sr, daw_sample)
    # 返回音频
    out_wav_path = io.BytesIO()
    soundfile.write(out_wav_path, tar_audio, daw_sample, format="wav")
    out_wav_path.seek(0)
    return send_file(out_wav_path, download_name="temp.wav", as_attachment=True)


if __name__ == '__main__':
    # fish下只需传入下列参数
    checkpoint_path = 'logs/DiffSVC/version_0/checkpoints/epoch=123-step=300000-valid_loss=0.17.ckpt'
    config_path = 'configs/svc_cn_hubert_soft_ms.py'
    # 加速倍率,None即采用配置文件的值
    sampler_interval = None
    # 是否提取人声,是否合成非人声,以及人声响度增益
    extract_vocals = True
    merge_non_vocals = False
    vocals_loudness_gain = 0.0
    # 最大切片时长
    max_slice_duration = 30.0
    # 静音阈值
    silence_threshold = 60

    svc_model = SvcFish(checkpoint_path, config_path, sampler_interval=sampler_interval,
                    extract_vocals=extract_vocals,merge_non_vocals=merge_non_vocals,
                    vocals_loudness_gain=vocals_loudness_gain,silence_threshold=silence_threshold,
                    max_slice_duration=max_slice_duration)

    # 此处与vst插件对应,不建议更改
    app.run(port=6842, host="0.0.0.0", debug=False, threaded=False)