|
import random |
|
import uuid |
|
import numpy |
|
import os |
|
import random |
|
import fnmatch |
|
|
|
from tqdm.auto import tqdm |
|
from scipy.io import wavfile |
|
|
|
from bark.generation import load_model, SAMPLE_RATE |
|
from bark.api import semantic_to_waveform |
|
|
|
from bark import text_to_semantic |
|
from bark.generation import load_model |
|
|
|
from training.data import load_books, random_split_chunk |
|
|
|
output = 'training/data/output' |
|
output_wav = 'training/data/output_wav' |
|
|
|
|
|
def prepare_semantics_from_text(num_generations): |
|
loaded_data = load_books(True) |
|
|
|
print('Loading semantics model') |
|
load_model(use_gpu=True, use_small=False, force_reload=False, model_type='text') |
|
|
|
if not os.path.isdir(output): |
|
os.mkdir(output) |
|
|
|
loop = 1 |
|
while 1: |
|
filename = uuid.uuid4().hex + '.npy' |
|
file_name = os.path.join(output, filename) |
|
text = '' |
|
while not len(text) > 0: |
|
text = random_split_chunk(loaded_data) |
|
text = text.strip() |
|
print(f'{loop} Generating semantics for text:', text) |
|
loop+=1 |
|
semantics = text_to_semantic(text, temp=round(random.uniform(0.6, 0.8), ndigits=2)) |
|
numpy.save(file_name, semantics) |
|
|
|
|
|
def prepare_wavs_from_semantics(): |
|
if not os.path.isdir(output): |
|
raise Exception('No \'output\' folder, make sure you run create_data.py first!') |
|
if not os.path.isdir(output_wav): |
|
os.mkdir(output_wav) |
|
|
|
print('Loading coarse model') |
|
load_model(use_gpu=True, use_small=False, force_reload=False, model_type='coarse') |
|
print('Loading fine model') |
|
load_model(use_gpu=True, use_small=False, force_reload=False, model_type='fine') |
|
|
|
files = fnmatch.filter(os.listdir(output), '*.npy') |
|
current = 1 |
|
total = len(files) |
|
|
|
for i, f in tqdm(enumerate(files), total=len(files)): |
|
real_name = '.'.join(f.split('.')[:-1]) |
|
file_name = os.path.join(output, f) |
|
out_file = os.path.join(output_wav, f'{real_name}.wav') |
|
if not os.path.isfile(out_file) and os.path.isfile(file_name): |
|
print(f'Processing ({i+1}/{total}) -> {f}') |
|
wav = semantic_to_waveform(numpy.load(file_name), temp=round(random.uniform(0.6, 0.8), ndigits=2)) |
|
|
|
|
|
wavfile.write(out_file, SAMPLE_RATE, wav) |
|
|
|
print('Done!') |
|
|
|
|