fx
Browse files- tts_harvard.py +8 -8
tts_harvard.py
CHANGED
@@ -17,9 +17,9 @@ LABELS = ['arousal', 'dominance', 'valence']
|
|
17 |
def load_speech(split=None):
|
18 |
DB = [
|
19 |
# [dataset, version, table, has_timdeltas_or_is_full_wavfile]
|
20 |
-
|
21 |
-
['librispeech', '3.1.0', 'test-clean', False],
|
22 |
-
|
23 |
# ['entertain-playtestcloud', '1.1.0', 'emotion.categories.train.gold_standard', True],
|
24 |
# ['erik', '2.2.0', 'emotion.categories.train.gold_standard', True],
|
25 |
# ['meld', '1.3.1', 'emotion.categories.train.gold_standard', False],
|
@@ -116,19 +116,19 @@ for audio_prompt in ['english',
|
|
116 |
# harvard.append(long_sentence.replace('.', ' '))
|
117 |
for text in list_of_10['sentences']:
|
118 |
if audio_prompt == 'english':
|
119 |
-
_p = synthetic_wav_paths[ix % 134]
|
120 |
style_vec = msinference.compute_style(_p)
|
121 |
elif audio_prompt == 'english_4x':
|
122 |
-
_p = synthetic_wav_paths_4x[ix % 134]
|
123 |
style_vec = msinference.compute_style(_p)
|
124 |
elif audio_prompt == 'human':
|
125 |
_p = natural_wav_paths[ix % len(natural_wav_paths)]
|
126 |
style_vec = msinference.compute_style(_p)
|
127 |
elif audio_prompt == 'foreign':
|
128 |
-
_p = synthetic_wav_paths_foreign[ix % 204
|
129 |
style_vec = msinference.compute_style(_p)
|
130 |
elif audio_prompt == 'foreign_4x':
|
131 |
-
_p = synthetic_wav_paths_foreign_4x[ix %
|
132 |
style_vec = msinference.compute_style(_p)
|
133 |
else:
|
134 |
print('unknonw list of style vector')
|
@@ -154,4 +154,4 @@ for audio_prompt in ['english',
|
|
154 |
soundfile.write('_st_' + OUT_FILE, total_style, fsr) # take this fs from the loading
|
155 |
|
156 |
else:
|
157 |
-
print('\nALREADY EXISTS\n')
|
|
|
17 |
def load_speech(split=None):
|
18 |
DB = [
|
19 |
# [dataset, version, table, has_timdeltas_or_is_full_wavfile]
|
20 |
+
# ['crema-d', '1.1.1', 'emotion.voice.test', False],
|
21 |
+
#['librispeech', '3.1.0', 'test-clean', False],
|
22 |
+
['emodb', '1.2.0', 'emotion.categories.train.gold_standard', False],
|
23 |
# ['entertain-playtestcloud', '1.1.0', 'emotion.categories.train.gold_standard', True],
|
24 |
# ['erik', '2.2.0', 'emotion.categories.train.gold_standard', True],
|
25 |
# ['meld', '1.3.1', 'emotion.categories.train.gold_standard', False],
|
|
|
116 |
# harvard.append(long_sentence.replace('.', ' '))
|
117 |
for text in list_of_10['sentences']:
|
118 |
if audio_prompt == 'english':
|
119 |
+
_p = synthetic_wav_paths[ix % len(synthetic_wav_paths)] #134]
|
120 |
style_vec = msinference.compute_style(_p)
|
121 |
elif audio_prompt == 'english_4x':
|
122 |
+
_p = synthetic_wav_paths_4x[ix % len(synthetic_wav_paths_4x)] # 134]
|
123 |
style_vec = msinference.compute_style(_p)
|
124 |
elif audio_prompt == 'human':
|
125 |
_p = natural_wav_paths[ix % len(natural_wav_paths)]
|
126 |
style_vec = msinference.compute_style(_p)
|
127 |
elif audio_prompt == 'foreign':
|
128 |
+
_p = synthetic_wav_paths_foreign[ix % len(synthetic_wav_paths_foreign)] #179] # 204 some short styles are discarded
|
129 |
style_vec = msinference.compute_style(_p)
|
130 |
elif audio_prompt == 'foreign_4x':
|
131 |
+
_p = synthetic_wav_paths_foreign_4x[ix % len(synthetic_wav_paths_foreign_4x)] #179] # 204
|
132 |
style_vec = msinference.compute_style(_p)
|
133 |
else:
|
134 |
print('unknonw list of style vector')
|
|
|
154 |
soundfile.write('_st_' + OUT_FILE, total_style, fsr) # take this fs from the loading
|
155 |
|
156 |
else:
|
157 |
+
print('\nALREADY EXISTS\n')
|