visuals for 3mimic & 1human - draft
Browse files- mimic3_make_harvard_sentences.py +127 -231
mimic3_make_harvard_sentences.py
CHANGED
@@ -1,10 +1,3 @@
|
|
1 |
-
# 1. Syntesize Harvard Sentences via Mimic-3 - 1 voice
|
2 |
-
# 1. Synthesize via StyleTTS2 --> use same or sweetdreams
|
3 |
-
# 2. Run audinterface on this 767
|
4 |
-
# 3. .mimic3_pkl .styletts2_pkl -> different durations
|
5 |
-
|
6 |
-
# It may crash due to non-truly-blocking shutil.copyfile() saying onnx protobuf incomplete file
|
7 |
-
# You have to rerun the script - it will copy all voices from hf:mimic3-voices to ~/.local/mimic3
|
8 |
import shutil
|
9 |
import csv
|
10 |
import io
|
@@ -12,6 +5,7 @@ import os
|
|
12 |
import typing
|
13 |
import wave
|
14 |
import sys
|
|
|
15 |
from mimic3_tts.__main__ import (CommandLineInterfaceState,
|
16 |
get_args,
|
17 |
initialize_args,
|
@@ -21,7 +15,7 @@ from mimic3_tts.__main__ import (CommandLineInterfaceState,
|
|
21 |
shutdown_tts,
|
22 |
OutputNaming,
|
23 |
process_line)
|
24 |
-
|
25 |
import time
|
26 |
import json
|
27 |
import pandas as pd
|
@@ -39,31 +33,44 @@ import audiofile
|
|
39 |
|
40 |
|
41 |
# ================================================ LIST OF VOICES
|
42 |
-
ROOT_DIR = '/data/dkounadis/mimic3-voices/'
|
43 |
-
foreign_voices = []
|
44 |
-
english_voices = []
|
45 |
-
for lang in os.listdir(ROOT_DIR + 'voices'):
|
46 |
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
# ================================================== INTERFACE MODELS
|
68 |
LABELS = [
|
69 |
'arousal', 'dominance', 'valence',
|
@@ -156,8 +163,8 @@ interface = audinterface.Feature(
|
|
156 |
process_func=process_function,
|
157 |
# process_func_args={'outputs': 'logits_scene'},
|
158 |
process_func_applies_sliding_window=False,
|
159 |
-
win_dur=
|
160 |
-
hop_dur=
|
161 |
sampling_rate=16000,
|
162 |
resample=True,
|
163 |
verbose=True,
|
@@ -168,38 +175,6 @@ interface = audinterface.Feature(
|
|
168 |
|
169 |
|
170 |
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
# Filter insufficient durations - prompt
|
188 |
-
foreign_voices = [i for i in foreign_voices if i not in ['bn/multi_low#02194',
|
189 |
-
'uk_UK/m-ailabs_low#obruchov',
|
190 |
-
'uk_UK/m-ailabs_low#shepel',
|
191 |
-
'uk_UK/m-ailabs_low#loboda',
|
192 |
-
'uk_UK/m-ailabs_low#miskun',
|
193 |
-
'uk_UK/m-ailabs_low#sumska',
|
194 |
-
'uk_UK/m-ailabs_low#pysariev',
|
195 |
-
]]
|
196 |
-
|
197 |
-
# print(english_voices, '\n_________________________\n', foreign_voices)
|
198 |
-
# ----------------------
|
199 |
-
# print(foreign_voices.keys(), len(foreign_voices))
|
200 |
-
# raise SystemExit
|
201 |
-
|
202 |
-
|
203 |
def process_lines(state: CommandLineInterfaceState, wav_path=None):
|
204 |
'''MIMIC3 INTERNAL CALL that yields the sigh sound'''
|
205 |
|
@@ -264,114 +239,36 @@ def process_lines(state: CommandLineInterfaceState, wav_path=None):
|
|
264 |
# https://huggingface.co/dkounadis/artificial-styletts2/tree/main/mimic3_foreign
|
265 |
|
266 |
# STYLES Already Made - HF
|
267 |
-
|
268 |
-
|
269 |
|
270 |
-
Path(english_dir).mkdir(parents=True, exist_ok=True)
|
271 |
-
Path(foreign_dir).mkdir(parents=True, exist_ok=True)
|
272 |
|
273 |
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
# # state.stdout = True
|
280 |
-
# # state.tts = True
|
281 |
-
# process_lines(state, wav_path='tmp1.wav')
|
282 |
-
# shutdown_tts(state)
|
283 |
-
# x, fs = audiofile.read('tmp1.wav')
|
284 |
-
# total_audio_mimic3.append(x)
|
285 |
-
# print(fs, text, 'mimic3')
|
286 |
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
#
|
295 |
-
#
|
296 |
-
|
297 |
-
#
|
298 |
-
#
|
299 |
-
|
300 |
-
#
|
301 |
-
#
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
# load all harvard and for every voice -> load-its-style -> synth-mimic3 -> synth-stylett2 -> run-both-pkl
|
344 |
-
# FOREIGN
|
345 |
-
for folder, list_voices in [
|
346 |
-
['foreign', foreign_voices],
|
347 |
-
['english', english_voices],
|
348 |
-
]:
|
349 |
-
print(folder, list_voices[:4], '\n\nEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE')
|
350 |
-
for _id, _voice in enumerate(list_voices[:4]):
|
351 |
-
_str = _voice.replace('/', '_').replace('#', '_').replace('_low', '')
|
352 |
-
_dir = folder + '_pkl/'
|
353 |
-
if 'cmu-arctic' in _str:
|
354 |
-
_str = _str.replace('cmu-arctic', 'cmu_arctic') #+ '.wav'
|
355 |
-
|
356 |
-
print('\n\n\n\nExecuting', _voice,'\n\n\n\n\n')
|
357 |
-
|
358 |
-
if (
|
359 |
-
not os.path.isfile(_dir + 'mimic3__' + _str + '.wav') or
|
360 |
-
not os.path.isfile(_dir + 'styletts2__' + _str + '.wav')
|
361 |
-
):
|
362 |
-
|
363 |
-
# Mimic3 GitHub Quota exceded:
|
364 |
-
# https://github.com/MycroftAI/mimic3-voices
|
365 |
-
# Above repo can exceed download quota of LFS
|
366 |
-
# Copy mimic-voices from local copies
|
367 |
-
# clone https://huggingface.co/mukowaty/mimic3-voices/tree/main/voices
|
368 |
-
# copy to ~/
|
369 |
-
#
|
370 |
-
#
|
371 |
home_voice_dir = f'/home/audeering.local/dkounadis/.local/share/mycroft/mimic3/voices/{_voice.split("#")[0]}/'
|
372 |
Path(home_voice_dir).mkdir(parents=True, exist_ok=True)
|
373 |
-
|
374 |
-
|
375 |
speaker_free_voice_name = _voice.split("#")[0] if '#' in _voice else _voice
|
376 |
|
377 |
|
@@ -386,53 +283,28 @@ for folder, list_voices in [
|
|
386 |
f'/data/dkounadis/mimic3-voices/voices/{speaker_free_voice_name}/generator.onnx',
|
387 |
home_voice_dir + 'generator.onnx')
|
388 |
|
389 |
-
|
390 |
-
|
391 |
-
# pre made
|
392 |
-
prompt_path = f'mimic3_{folder}_4x/' + _str + '.wav'
|
393 |
-
|
394 |
-
|
395 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
396 |
|
|
|
397 |
|
|
|
|
|
398 |
|
399 |
|
|
|
400 |
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
# ACTUAL TTS
|
406 |
-
|
407 |
-
|
408 |
-
with open('harvard.json', 'r') as f:
|
409 |
-
harvard_individual_sentences = json.load(f)['sentences']
|
410 |
-
total_audio_mimic3 = []
|
411 |
-
total_audio_stts2 = []
|
412 |
-
ix = 0
|
413 |
-
for list_of_10 in harvard_individual_sentences[:1]: # 77
|
414 |
-
text = ' '.join(list_of_10['sentences'])
|
415 |
-
# harvard.append(long_sentence.replace('.', ' '))
|
416 |
-
# for text in list_of_10['sentences']:
|
417 |
-
style_vec = msinference.compute_style(prompt_path)
|
418 |
-
print(ix, text)
|
419 |
-
ix += 1
|
420 |
-
|
421 |
-
|
422 |
-
x = msinference.inference(text,
|
423 |
-
style_vec,
|
424 |
-
alpha=0.3,
|
425 |
-
beta=0.7,
|
426 |
-
diffusion_steps=7,
|
427 |
-
embedding_scale=1)
|
428 |
-
|
429 |
-
total_audio_stts2.append(x)
|
430 |
-
|
431 |
-
# also synthesize mimic with the same sentence and voice
|
432 |
-
|
433 |
-
# MIMIC-3 = = = = = = = = = = = = = = BEGIN
|
434 |
-
|
435 |
-
rate = 1 # high speed sounds nice if used as speaker-reference audio for StyleTTS2
|
436 |
_ssml = (
|
437 |
'<speak>'
|
438 |
'<prosody volume=\'64\'>'
|
@@ -472,51 +344,75 @@ for folder, list_voices in [
|
|
472 |
process_lines(state, wav_path='tmp1.wav')
|
473 |
shutdown_tts(state)
|
474 |
x, fs = audiofile.read('tmp1.wav')
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
479 |
|
480 |
|
481 |
|
|
|
|
|
|
|
482 |
|
483 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
484 |
|
485 |
-
total_audio_stts2 = np.concatenate(total_audio_stts2) # -- concat 77x lists
|
486 |
-
audiofile.write(_dir + 'styletts2__' + _str + '.wav', total_audio_stts2, 24000)
|
487 |
|
488 |
-
total_audio_mimic3 = np.concatenate(total_audio_mimic3) # -- concat 77x lists
|
489 |
-
audiofile.write(_dir + 'mimic3__' + _str + '.wav', total_audio_mimic3, 22050)
|
490 |
|
491 |
-
print('Saving:', _dir + 'mimic3__' + _str + '.wav')
|
492 |
-
else:
|
493 |
-
print('Skip:', _dir + 'styletts2__' + _str + '.wav')
|
494 |
|
495 |
-
|
496 |
-
# AUD I N T E R F A C E
|
497 |
-
# file_interface = f'timeseries_{long_audio.replace("/", "")}.pkl'
|
498 |
|
499 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
500 |
|
501 |
-
for engine in ['mimic3', 'styletts2']:
|
502 |
-
harvard_of_voice = f'{_dir}{engine}__{_str}'
|
503 |
-
if not os.path.exists(harvard_of_voice + '.pkl'):
|
504 |
-
df = interface.process_file(harvard_of_voice + '.wav')
|
505 |
-
df.to_pickle(harvard_of_voice + '.pkl')
|
506 |
-
else:
|
507 |
-
# df = pd.read_pickle(harvard_of_voice + '.pkl')
|
508 |
-
print(harvard_of_voice + '.pkl', 'FOUND')
|
509 |
|
510 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
511 |
|
512 |
|
513 |
|
514 |
-
|
|
|
515 |
|
516 |
|
517 |
|
518 |
|
519 |
-
|
520 |
|
521 |
# ===============================================================================
|
522 |
# V I S U A L S
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import shutil
|
2 |
import csv
|
3 |
import io
|
|
|
5 |
import typing
|
6 |
import wave
|
7 |
import sys
|
8 |
+
import audresample
|
9 |
from mimic3_tts.__main__ import (CommandLineInterfaceState,
|
10 |
get_args,
|
11 |
initialize_args,
|
|
|
15 |
shutdown_tts,
|
16 |
OutputNaming,
|
17 |
process_line)
|
18 |
+
import msinference
|
19 |
import time
|
20 |
import json
|
21 |
import pandas as pd
|
|
|
33 |
|
34 |
|
35 |
# ================================================ LIST OF VOICES
|
36 |
+
# ROOT_DIR = '/data/dkounadis/mimic3-voices/'
|
37 |
+
# foreign_voices = []
|
38 |
+
# english_voices = []
|
39 |
+
# for lang in os.listdir(ROOT_DIR + 'voices'):
|
40 |
|
41 |
+
# for voice in os.listdir(ROOT_DIR + 'voices/' + lang):
|
42 |
+
# if 'en_' in lang:
|
43 |
+
|
44 |
+
# try:
|
45 |
+
# with open(ROOT_DIR + 'voices/' + lang + '/' + voice + '/speakers.txt', 'r') as f:
|
46 |
+
# for spk in f:
|
47 |
+
# english_voices.append(lang + '/' + voice + '#' + spk.rstrip())
|
48 |
+
# # voice_spk_string = lang + '/' + voice + '#' + spk.rstrip() for spk in f
|
49 |
+
# except FileNotFoundError:
|
50 |
+
# english_voices.append(lang + '/' + voice)
|
51 |
+
|
52 |
+
# else:
|
53 |
|
54 |
+
# try:
|
55 |
+
# with open(ROOT_DIR + 'voices/' + lang + '/' + voice + '/speakers.txt', 'r') as f:
|
56 |
+
# for spk in f:
|
57 |
+
# foreign_voices.append(lang + '/' + voice + '#' + spk.rstrip())
|
58 |
|
59 |
+
# except FileNotFoundError:
|
60 |
+
# foreign_voices.append(lang + '/' + voice)
|
61 |
+
# #
|
62 |
+
# [print(i) for i in foreign_voices]
|
63 |
+
# print('\n_______________________________\n')
|
64 |
+
# [print(i) for i in english_voices]
|
65 |
+
# ====================================================== END PRINT LIST OF VOICES
|
66 |
+
list_voices = [
|
67 |
+
'en_US/m-ailabs_low#mary_ann',
|
68 |
+
'en_UK/apope_low',
|
69 |
+
'de_DE/thorsten-emotion_low#neutral', # is the 4x really interesting we can just write it in Section
|
70 |
+
'human'
|
71 |
+
] # special - for human we load specific style file - no Mimic3 is run
|
72 |
+
|
73 |
+
|
74 |
# ================================================== INTERFACE MODELS
|
75 |
LABELS = [
|
76 |
'arousal', 'dominance', 'valence',
|
|
|
163 |
process_func=process_function,
|
164 |
# process_func_args={'outputs': 'logits_scene'},
|
165 |
process_func_applies_sliding_window=False,
|
166 |
+
win_dur=7.0,
|
167 |
+
hop_dur=4.0,
|
168 |
sampling_rate=16000,
|
169 |
resample=True,
|
170 |
verbose=True,
|
|
|
175 |
|
176 |
|
177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
def process_lines(state: CommandLineInterfaceState, wav_path=None):
|
179 |
'''MIMIC3 INTERNAL CALL that yields the sigh sound'''
|
180 |
|
|
|
239 |
# https://huggingface.co/dkounadis/artificial-styletts2/tree/main/mimic3_foreign
|
240 |
|
241 |
# STYLES Already Made - HF
|
242 |
+
out_dir = 'out_dir/'
|
243 |
+
Path(out_dir).mkdir(parents=True, exist_ok=True)
|
244 |
|
|
|
|
|
245 |
|
246 |
|
247 |
+
for _id, _voice in enumerate(list_voices):
|
248 |
+
_str = _voice.replace('/', '_').replace('#', '_').replace('_low', '')
|
249 |
+
|
250 |
+
if 'cmu-arctic' in _str:
|
251 |
+
_str = _str.replace('cmu-arctic', 'cmu_arctic') #+ '.wav'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
+
print('\n\n\n\nExecuting', _voice,'\n\n\n\n\n')
|
254 |
+
|
255 |
+
if (
|
256 |
+
not os.path.isfile(out_dir + 'mimic3__' + _str + '.wav') or
|
257 |
+
not os.path.isfile(out_dir + 'styletts2__' + _str + '.wav')
|
258 |
+
):
|
259 |
+
|
260 |
+
# Mimic3 GitHub Quota exceded:
|
261 |
+
# https://github.com/MycroftAI/mimic3-voices
|
262 |
+
# Above repo can exceed download quota of LFS
|
263 |
+
# Copy mimic-voices from local copies
|
264 |
+
# clone https://huggingface.co/mukowaty/mimic3-voices/tree/main/voices
|
265 |
+
# copy to ~/
|
266 |
+
#
|
267 |
+
#
|
268 |
+
if 'human' not in _voice:
|
269 |
+
# assure mimic-3 generator .onnx exists
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
home_voice_dir = f'/home/audeering.local/dkounadis/.local/share/mycroft/mimic3/voices/{_voice.split("#")[0]}/'
|
271 |
Path(home_voice_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
|
272 |
speaker_free_voice_name = _voice.split("#")[0] if '#' in _voice else _voice
|
273 |
|
274 |
|
|
|
283 |
f'/data/dkounadis/mimic3-voices/voices/{speaker_free_voice_name}/generator.onnx',
|
284 |
home_voice_dir + 'generator.onnx')
|
285 |
|
286 |
+
|
|
|
|
|
|
|
|
|
|
|
287 |
|
288 |
+
|
289 |
+
# prompt_path = f'mimic3_{folder}_4x/' + _str + '.wav'
|
290 |
+
with open('harvard.json', 'r') as f:
|
291 |
+
harvard_individual_sentences = json.load(f)['sentences']
|
292 |
+
total_audio_mimic3 = []
|
293 |
+
total_audio_stts2 = []
|
294 |
+
ix = 0
|
295 |
+
for list_of_10 in harvard_individual_sentences[:1]: # 77
|
296 |
|
297 |
+
text = ' '.join(list_of_10['sentences'])
|
298 |
|
299 |
+
print(ix, text)
|
300 |
+
ix += 1
|
301 |
|
302 |
|
303 |
+
# Synthesis Mimic-3 then use it as prompt for StyleTTS2
|
304 |
|
305 |
+
# MIMIC-3 if _voice is not HUMAN
|
306 |
+
if 'human' not in _voice:
|
307 |
+
rate = 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
_ssml = (
|
309 |
'<speak>'
|
310 |
'<prosody volume=\'64\'>'
|
|
|
344 |
process_lines(state, wav_path='tmp1.wav')
|
345 |
shutdown_tts(state)
|
346 |
x, fs = audiofile.read('tmp1.wav')
|
347 |
+
print(x.shape)
|
348 |
+
else:
|
349 |
+
# MSP['valence.train.votes'].get().sort_values('7').index[-1]
|
350 |
+
human_style = '/cache/audb/msppodcast/2.4.0/fe182b91/Audios/MSP-PODCAST_0235_0053.wav'
|
351 |
+
x, fs = audiofile.read(human_style)
|
352 |
+
print(x.shape,' human') # crop human to almost mimic-3 duration
|
353 |
+
total_audio_mimic3.append(x)
|
354 |
+
print(fs, text, 'mimic3')
|
355 |
+
|
356 |
+
# MIMIC3 = = = = = = = = = = = = = = END
|
357 |
|
358 |
|
359 |
|
360 |
+
|
361 |
+
style_vec = msinference.compute_style('tmp1.wav') # use mimic-3 as prompt
|
362 |
+
|
363 |
|
364 |
|
365 |
+
x = msinference.inference(text,
|
366 |
+
style_vec,
|
367 |
+
alpha=0.3,
|
368 |
+
beta=0.7,
|
369 |
+
diffusion_steps=7,
|
370 |
+
embedding_scale=1)
|
371 |
+
|
372 |
+
total_audio_stts2.append(x)
|
373 |
|
|
|
|
|
374 |
|
|
|
|
|
375 |
|
|
|
|
|
|
|
376 |
|
|
|
|
|
|
|
377 |
|
378 |
|
379 |
+
total_audio_stts2 = np.concatenate(total_audio_stts2) # -- concat 77x lists
|
380 |
+
total_audio_stts2 = audresample.resample(total_audio_stts2, original_rate=24000, target_rate=16000)[0] # for audinterface
|
381 |
+
audiofile.write(out_dir + 'styletts2__' + _str + '.wav', total_audio_stts2, 16000)
|
382 |
+
|
383 |
+
total_audio_mimic3 = np.concatenate(total_audio_mimic3) # -- concat 77x lists
|
384 |
+
total_audio_mimic3 = audresample.resample(total_audio_mimic3, original_rate=24000, target_rate=16000)[0]
|
385 |
+
audiofile.write(out_dir + 'mimic3__' + _str + '.wav', total_audio_mimic3, 16000)
|
386 |
+
|
387 |
+
print('Saving:', out_dir + 'mimic3__' + _str + '.wav')
|
388 |
+
else:
|
389 |
+
print('Skip:', out_dir + 'styletts2__' + _str + '.wav')
|
390 |
+
|
391 |
+
|
392 |
+
# AUD I N T E R F A C E
|
393 |
+
# file_interface = f'timeseries_{long_audio.replace("/", "")}.pkl'
|
394 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
|
396 |
|
397 |
+
for engine in ['mimic3', 'styletts2']:
|
398 |
+
harvard_of_voice = f'{out_dir}{engine}__{_str}'
|
399 |
+
if not os.path.exists(harvard_of_voice + '.pkl'):
|
400 |
+
df = interface.process_file(harvard_of_voice + '.wav')
|
401 |
+
df.to_pickle(harvard_of_voice + '.pkl')
|
402 |
+
else:
|
403 |
+
# df = pd.read_pickle(harvard_of_voice + '.pkl')
|
404 |
+
print(harvard_of_voice + '.pkl', 'FOUND')
|
405 |
+
|
406 |
|
407 |
|
408 |
|
409 |
+
|
410 |
+
|
411 |
|
412 |
|
413 |
|
414 |
|
415 |
+
print('\nVisuals\n')
|
416 |
|
417 |
# ===============================================================================
|
418 |
# V I S U A L S
|