from pathlib import Path |
import shutil |
import csv |
import io |
import os |
import typing |
import wave |
import sys |
from mimic3_tts.__main__ import (CommandLineInterfaceState, |
get_args, |
initialize_args, |
initialize_tts, |
shutdown_tts, |
OutputNaming, |
process_line) |
import time |
ROOT_DIR = '/data/dkounadis/mimic3-voices/' |
foreign_voices = [] |
for lang in os.listdir(ROOT_DIR + 'voices'): |
if 'en_' not in lang: |
for voice in os.listdir(ROOT_DIR + 'voices/' + lang): |
try: |
with open(ROOT_DIR + 'voices/' + lang + '/' + voice + '/speakers.txt', 'r') as f: |
foreign_voices += [lang + '/' + voice + '#' + spk.rstrip() for spk in f] |
except FileNotFoundError: |
foreign_voices.append(lang + '/' + voice) |
def process_lines(state: CommandLineInterfaceState, wav_path=None): |
'''MIMIC3 INTERNAL CALL that yields the sigh sound''' |
args = state.args |
result_idx = 0 |
print(f'why waitings in the for loop LIN {state.texts=}\n') |
for line in state.texts: |
line_voice: typing.Optional[str] = None |
line_id = "" |
line = line.strip() |
if args.output_naming == OutputNaming.ID: |
with io.StringIO(line) as line_io: |
reader = csv.reader(line_io, delimiter=args.csv_delimiter) |
row = next(reader) |
line_id, line = row[0], row[-1] |
if args.csv_voice: |
line_voice = row[1] |
process_line(line, state, line_id=line_id, line_voice=line_voice) |
result_idx += 1 |
time.sleep(4) |
if state.all_audio: |
if sys.stdout.isatty() and (not state.args.stdout): |
with io.BytesIO() as wav_io: |
wav_file_play: wave.Wave_write = wave.open(wav_io, "wb") |
with wav_file_play: |
wav_file_play.setframerate(state.sample_rate_hz) |
wav_file_play.setsampwidth(state.sample_width_bytes) |
wav_file_play.setnchannels(state.num_channels) |
wav_file_play.writeframes(state.all_audio) |
with open(wav_path, 'wb') as wav_file: |
wav_file.write(wav_io.getvalue()) |
wav_file.seek(0) |
print('\n\nTTSING', wav_path) |
else: |
print('\n\nDOES NOT TTSING --> ADD SOME time.sleep(4)', wav_path) |
reference_wav_directory = 'style_vectors_speed1_ICASSP/' |
Path(reference_wav_directory).mkdir(parents=True, exist_ok=True) |
wav_dir = 'assets/wavs/' |
Path(wav_dir).mkdir(parents=True, exist_ok=True) |
for _id, _voice in enumerate(foreign_voices): |
home_voice_dir = f'/home/audeering.local/dkounadis/.local/share/mycroft/mimic3/voices/{_voice.split("#")[0]}/' |
Path(home_voice_dir).mkdir(parents=True, exist_ok=True) |
speaker_free_voice_name = _voice.split("#")[0] if '#' in _voice else _voice |
if ( |
(not os.path.isfile(home_voice_dir + 'generator.onnx')) or |
(os.path.getsize(home_voice_dir + 'generator.onnx') < 500) |
): |
shutil.copyfile( |
f'/data/dkounadis/mimic3-voices/voices/{speaker_free_voice_name}/generator.onnx', |
home_voice_dir + 'generator.onnx') |
prepare_file = _voice.replace('/', '_').replace('#', '_').replace('_low', '') |
if 'cmu-arctic' in prepare_file: |
prepare_file = prepare_file.replace('cmu-arctic', 'cmu_arctic') + '.wav' |
else: |
prepare_file = prepare_file + '.wav' |
reference_wav = reference_wav_directory + prepare_file |
if not os.path.isfile(reference_wav): |
rate = 1 |
_ssml = ( |
'<speak>' |
'<prosody volume=\'64\'>' |
f'<prosody rate=\'{rate}\'>' |
f'<voice name=\'{_voice}\'>' |
'<s>' |
'Sweet dreams are made of this, .. !!! # I travel the world and the seven seas.' |
'</s>' |
'</voice>' |
'</prosody>' |
'</prosody>' |
'</speak>' |
) |
with open('_tmp_ssml.txt', 'w') as f: |
f.write(_ssml) |
args = get_args() |
args.ssml = True |
args.text = [_ssml] |
args.interactive = False |
state = CommandLineInterfaceState(args=args) |
initialize_args(state) |
initialize_tts(state) |
process_lines(state, wav_path=reference_wav) |
shutdown_tts(state) |
print(os.path.getsize(reference_wav), 'SZ') |