Dionyssos commited on
Commit
ccb2aa8
1 Parent(s): 3844e6c
Files changed (1) hide show
  1. tts_harvard.py +8 -8
tts_harvard.py CHANGED
@@ -17,9 +17,9 @@ LABELS = ['arousal', 'dominance', 'valence']
17
  def load_speech(split=None):
18
  DB = [
19
  # [dataset, version, table, has_timdeltas_or_is_full_wavfile]
20
- # ['crema-d', '1.1.1', 'emotion.voice.test', False],
21
- ['librispeech', '3.1.0', 'test-clean', False],
22
- # ['emodb', '1.2.0', 'emotion.categories.train.gold_standard', False],
23
  # ['entertain-playtestcloud', '1.1.0', 'emotion.categories.train.gold_standard', True],
24
  # ['erik', '2.2.0', 'emotion.categories.train.gold_standard', True],
25
  # ['meld', '1.3.1', 'emotion.categories.train.gold_standard', False],
@@ -116,19 +116,19 @@ for audio_prompt in ['english',
116
  # harvard.append(long_sentence.replace('.', ' '))
117
  for text in list_of_10['sentences']:
118
  if audio_prompt == 'english':
119
- _p = synthetic_wav_paths[ix % 134]
120
  style_vec = msinference.compute_style(_p)
121
  elif audio_prompt == 'english_4x':
122
- _p = synthetic_wav_paths_4x[ix % 134]
123
  style_vec = msinference.compute_style(_p)
124
  elif audio_prompt == 'human':
125
  _p = natural_wav_paths[ix % len(natural_wav_paths)]
126
  style_vec = msinference.compute_style(_p)
127
  elif audio_prompt == 'foreign':
128
- _p = synthetic_wav_paths_foreign[ix % 204]
129
  style_vec = msinference.compute_style(_p)
130
  elif audio_prompt == 'foreign_4x':
131
- _p = synthetic_wav_paths_foreign_4x[ix % 204]
132
  style_vec = msinference.compute_style(_p)
133
  else:
134
  print('unknonw list of style vector')
@@ -154,4 +154,4 @@ for audio_prompt in ['english',
154
  soundfile.write('_st_' + OUT_FILE, total_style, fsr) # take this fs from the loading
155
 
156
  else:
157
- print('\nALREADY EXISTS\n')
 
17
  def load_speech(split=None):
18
  DB = [
19
  # [dataset, version, table, has_timdeltas_or_is_full_wavfile]
20
+ # ['crema-d', '1.1.1', 'emotion.voice.test', False],
21
+ #['librispeech', '3.1.0', 'test-clean', False],
22
+ ['emodb', '1.2.0', 'emotion.categories.train.gold_standard', False],
23
  # ['entertain-playtestcloud', '1.1.0', 'emotion.categories.train.gold_standard', True],
24
  # ['erik', '2.2.0', 'emotion.categories.train.gold_standard', True],
25
  # ['meld', '1.3.1', 'emotion.categories.train.gold_standard', False],
 
116
  # harvard.append(long_sentence.replace('.', ' '))
117
  for text in list_of_10['sentences']:
118
  if audio_prompt == 'english':
119
+ _p = synthetic_wav_paths[ix % len(synthetic_wav_paths)] #134]
120
  style_vec = msinference.compute_style(_p)
121
  elif audio_prompt == 'english_4x':
122
+ _p = synthetic_wav_paths_4x[ix % len(synthetic_wav_paths_4x)] # 134]
123
  style_vec = msinference.compute_style(_p)
124
  elif audio_prompt == 'human':
125
  _p = natural_wav_paths[ix % len(natural_wav_paths)]
126
  style_vec = msinference.compute_style(_p)
127
  elif audio_prompt == 'foreign':
128
+ _p = synthetic_wav_paths_foreign[ix % len(synthetic_wav_paths_foreign)] #179] # 204 some short styles are discarded
129
  style_vec = msinference.compute_style(_p)
130
  elif audio_prompt == 'foreign_4x':
131
+ _p = synthetic_wav_paths_foreign_4x[ix % len(synthetic_wav_paths_foreign_4x)] #179] # 204
132
  style_vec = msinference.compute_style(_p)
133
  else:
134
  print('unknonw list of style vector')
 
154
  soundfile.write('_st_' + OUT_FILE, total_style, fsr) # take this fs from the loading
155
 
156
  else:
157
+ print('\nALREADY EXISTS\n')