|
import os |
|
import torch |
|
from openvoice import se_extractor |
|
from openvoice.api import ToneColorConverter |
|
|
|
ckpt_converter = '/kaggle/working/converter' |
|
device = "cuda:0" if torch.cuda.is_available() else "cpu" |
|
output_dir = 'outputs_v2' |
|
|
|
tone_color_converter = ToneColorConverter(f'{ckpt_converter}/config.json', device=device) |
|
tone_color_converter.load_ckpt(f'{ckpt_converter}/checkpoint.pth') |
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
reference_speaker = '/kaggle/working/industrialRevolution.wav' |
|
target_se, audio_name = se_extractor.get_se(reference_speaker, tone_color_converter, vad=False) |
|
|
|
source_se = torch.load(f'/kaggle/working/model.pth', map_location=device) |
|
|
|
import time |
|
|
|
out = '/kaggle/working/output.wav' |
|
cloned_voice = "RPReplay_Final1716272820.mp4.wav" |
|
|
|
wav = tts.tts(text="no, yes, nah, yeah, damn!", speaker_wav=cloned_voice, language="en", emotion="happy") |
|
scipy.io.wavfile.write(out, 24000, np.array(wav)) |
|
time1 = time.time() |
|
tone_color_converter.convert( |
|
audio_src_path=out, |
|
src_se=source_se, |
|
tgt_se=target_se, |
|
output_path="/kaggle/working/tmp.wav") |
|
print(time.time() - time1) |
|
from IPython.display import Audio |
|
display(Audio("/kaggle/working/tmp.wav")) |
|
display(Audio(wav, rate=24000)) |