Pendrokar commited on
Commit
328b0e0
β€’
1 Parent(s): 61cef98

fastpitch diff return response

Browse files
Files changed (2) hide show
  1. app.py +47 -31
  2. gr_client.py +22 -9
app.py CHANGED
@@ -26,7 +26,7 @@ current_voice_type = None
26
  base_speaker_emb = ''
27
 
28
  def load_model(voice_model_name):
29
- global current_voice_model, current_voice_type
30
 
31
  if voice_model_name == 'x_selpahi':
32
  # Lojban
@@ -47,8 +47,6 @@ def load_model(voice_model_name):
47
  'pluginsContext': '{}',
48
  }
49
 
50
- embs = base_speaker_emb
51
-
52
  print('Loading voice model...')
53
  try:
54
  json_data = xvaserver.loadModel(data)
@@ -59,13 +57,13 @@ def load_model(voice_model_name):
59
  voice_model_json = json.load(f)
60
 
61
  if model_type == 'xVAPitch':
62
- embs = voice_model_json['games'][0]['base_speaker_emb']
63
  elif model_type == 'FastPitch1.1':
64
- embs = voice_model_json['games'][0]['resemblyzer']
65
  except requests.exceptions.RequestException as err:
66
  print(f'FAILED to load voice model: {err}')
67
 
68
- return embs
69
 
70
 
71
  class LocalBlocksDemo(BlocksDemo):
@@ -83,12 +81,14 @@ class LocalBlocksDemo(BlocksDemo):
83
  surprise,
84
  use_deepmoji
85
  ):
 
 
86
  # grab only the first 1000 characters
87
  input_text = input_text[:1000]
88
 
89
  # load voice model if not the current model
90
  if (current_voice_model != voice):
91
- base_speaker_emb = load_model(voice)
92
 
93
  model_type = current_voice_type
94
  pace = pacing if pacing else 1.0
@@ -144,34 +144,50 @@ class LocalBlocksDemo(BlocksDemo):
144
  # with open('resources/app/server.log', 'r') as f:
145
  # print(f.read())
146
 
147
- arpabet_html = '<h6>ARPAbet & Phoneme lengths</h6>'
148
- arpabet_symbols = json_data['arpabet'].split('|')
149
- utter_time = 0
150
- for symb_i in range(len(json_data['durations'])):
151
- # skip PAD symbol
152
- if (arpabet_symbols[symb_i] == '<PAD>'):
153
- continue
154
-
155
- length = float(json_data['durations'][symb_i])
156
- arpa_length = str(round(length/2, 1))
157
- arpabet_html += '<strong\
158
- class="arpabet"\
159
- style="padding: 0 '\
160
- + str(arpa_length)\
161
- +'em"'\
162
- +f" title=\"{utter_time} + {length}\""\
163
- +'>'\
164
- + arpabet_symbols[symb_i]\
165
- + '</strong> '
166
- utter_time += round(length, 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  return [
169
  save_path,
170
  arpabet_html,
171
- round(json_data['em_angry'][0], 2),
172
- round(json_data['em_happy'][0], 2),
173
- round(json_data['em_sad'][0], 2),
174
- round(json_data['em_surprise'][0], 2),
175
  json_data
176
  ]
177
 
 
26
  base_speaker_emb = ''
27
 
28
  def load_model(voice_model_name):
29
+ global current_voice_model, current_voice_type, base_speaker_emb
30
 
31
  if voice_model_name == 'x_selpahi':
32
  # Lojban
 
47
  'pluginsContext': '{}',
48
  }
49
 
 
 
50
  print('Loading voice model...')
51
  try:
52
  json_data = xvaserver.loadModel(data)
 
57
  voice_model_json = json.load(f)
58
 
59
  if model_type == 'xVAPitch':
60
+ base_speaker_emb = voice_model_json['games'][0]['base_speaker_emb']
61
  elif model_type == 'FastPitch1.1':
62
+ base_speaker_emb = voice_model_json['games'][0]['resemblyzer']
63
  except requests.exceptions.RequestException as err:
64
  print(f'FAILED to load voice model: {err}')
65
 
66
+ return base_speaker_emb
67
 
68
 
69
  class LocalBlocksDemo(BlocksDemo):
 
81
  surprise,
82
  use_deepmoji
83
  ):
84
+ global current_voice_model, current_voice_type, base_speaker_emb
85
+
86
  # grab only the first 1000 characters
87
  input_text = input_text[:1000]
88
 
89
  # load voice model if not the current model
90
  if (current_voice_model != voice):
91
+ load_model(voice)
92
 
93
  model_type = current_voice_type
94
  pace = pacing if pacing else 1.0
 
144
  # with open('resources/app/server.log', 'r') as f:
145
  # print(f.read())
146
 
147
+ arpabet_html = ''
148
+ if voice == 'x_selpahi':
149
+ angry = 0
150
+ happy = 0
151
+ sad = 0
152
+ surprise = 0
153
+ else:
154
+ arpabet_html = '<h6>ARPAbet & Durations</h6>'
155
+ arpabet_html += '<table style="margin: 0 var(--size-2)"><tbody><tr>'
156
+ arpabet_nopad = json_data['arpabet'].split('|PAD|')
157
+ arpabet_symbols = json_data['arpabet'].split('|')
158
+ wpad_len = len(arpabet_symbols)
159
+ nopad_len = len(arpabet_nopad)
160
+ total_dur_length = 0
161
+ for symb_i in range(wpad_len):
162
+ if (arpabet_symbols[symb_i] == '<PAD>'):
163
+ continue
164
+ total_dur_length += float(json_data['durations'][symb_i])
165
+
166
+ for symb_i in range(wpad_len):
167
+ if (arpabet_symbols[symb_i] == '<PAD>'):
168
+ continue
169
+
170
+ arpabet_length = float(json_data['durations'][symb_i])
171
+ cell_width = round(arpabet_length / total_dur_length * 100, 2)
172
+ arpabet_html += '<td class="arpabet" style="width: '\
173
+ + str(cell_width)\
174
+ +'%">'\
175
+ + arpabet_symbols[symb_i]\
176
+ + '</td> '
177
+ arpabet_html += '<tr></tbody></table>'
178
+
179
+ angry = round(json_data['em_angry'][0], 2),
180
+ happy = round(json_data['em_happy'][0], 2),
181
+ sad = round(json_data['em_sad'][0], 2),
182
+ surprise = round(json_data['em_surprise'][0], 2),
183
 
184
  return [
185
  save_path,
186
  arpabet_html,
187
+ angry,
188
+ happy,
189
+ sad,
190
+ surprise,
191
  json_data
192
  ]
193
 
gr_client.py CHANGED
@@ -502,6 +502,7 @@ class BlocksDemo:
502
  inputs=voice_radio,
503
  outputs=output_wav,
504
  queue=True,
 
505
  )
506
 
507
  # Switched to Lojban voice
@@ -546,9 +547,16 @@ class BlocksDemo:
546
 
547
  json_data = json.loads(response.replace("'", '"'))
548
 
549
- arpabet_html = '<h6>ARPAbet & Durations</h6>'
550
- arpabet_html += '<table style="margin: 0 var(--size-2)"><tbody><tr>'
551
- arpabet_nopad = json_data['arpabet'].split('|PAD|')
 
 
 
 
 
 
 
552
  arpabet_symbols = json_data['arpabet'].split('|')
553
  wpad_len = len(arpabet_symbols)
554
  nopad_len = len(arpabet_nopad)
@@ -568,16 +576,21 @@ class BlocksDemo:
568
  + str(cell_width)\
569
  +'%">'\
570
  + arpabet_symbols[symb_i]\
571
- + '</td> '
572
- arpabet_html += '<tr></tbody></table>'
 
 
 
 
 
573
 
574
  return [
575
  wav_path,
576
  arpabet_html,
577
- round(json_data['em_angry'][0], 2),
578
- round(json_data['em_happy'][0], 2),
579
- round(json_data['em_sad'][0], 2),
580
- round(json_data['em_surprise'][0], 2),
581
  response
582
  ]
583
 
 
502
  inputs=voice_radio,
503
  outputs=output_wav,
504
  queue=True,
505
+ trigger_mode='once',
506
  )
507
 
508
  # Switched to Lojban voice
 
547
 
548
  json_data = json.loads(response.replace("'", '"'))
549
 
550
+ arpabet_html = ''
551
+ if voice == 'x_selpahi':
552
+ angry = 0
553
+ happy = 0
554
+ sad = 0
555
+ surprise = 0
556
+ else:
557
+ arpabet_html = '<h6>ARPAbet & Durations</h6>'
558
+ arpabet_html += '<table style="margin: 0 var(--size-2)"><tbody><tr>'
559
+ arpabet_nopad = json_data['arpabet'].split('|PAD|')
560
  arpabet_symbols = json_data['arpabet'].split('|')
561
  wpad_len = len(arpabet_symbols)
562
  nopad_len = len(arpabet_nopad)
 
576
  + str(cell_width)\
577
  +'%">'\
578
  + arpabet_symbols[symb_i]\
579
+ + '</td> '
580
+ arpabet_html += '<tr></tbody></table>'
581
+
582
+ angry = round(json_data['em_angry'][0], 2),
583
+ happy = round(json_data['em_happy'][0], 2),
584
+ sad = round(json_data['em_sad'][0], 2),
585
+ surprise = round(json_data['em_surprise'][0], 2),
586
 
587
  return [
588
  wav_path,
589
  arpabet_html,
590
+ angry,
591
+ happy,
592
+ sad,
593
+ surprise,
594
  response
595
  ]
596