AdalAbilbekov commited on
Commit
9e9a056
1 Parent(s): 08f5766
app.py CHANGED
@@ -65,35 +65,42 @@ vocoder.remove_weight_norm()
65
  emotions = sorted(["angry", "surprise", "fear", "happy", "neutral", "sad"])
66
  spekears = ['Madi', 'Marzhan', 'Akzhol']
67
 
68
- def generate_audio(text, quantity, speaker, emotion_1, emotion_2):
69
- x, x_lengths = convert_text(text)
70
- emo_1, emo_2 = emotions.index(emotion_1), emotions.index(emotion_2)
71
- emo1 = torch.LongTensor([emo_1]).to(device)
72
- emo2 = torch.LongTensor([emo_2]).to(device)
73
- sid = torch.LongTensor([spekears.index(speaker)]).to(device)
74
- intensity = quantity / 100
 
 
 
75
 
76
- y_enc, y_dec, attn = gradtts_uncond_model.classifier_guidance_decode_two_mixture(
77
- x, x_lengths,
78
- n_timesteps=10,
79
- temperature=2.0,
80
- stoc=args.stoc,
81
- spk=sid,
82
- emo1=emo1,
83
- emo2=emo2,
84
- emo1_weight=intensity,
85
- length_scale=1.,
86
- classifier_func=model.forward,
87
- guidance=300,
88
- classifier_type=model.model_type
89
- )
90
- y_dec = y_dec.detach()
91
- res = y_dec.squeeze().to(device).numpy()
92
- x = torch.from_numpy(res).unsqueeze(0)
93
- y_g_hat = vocoder(x)
94
- audio = y_g_hat.squeeze()
95
- audio = audio * 32768.0
96
- audio = audio.detach().cpu().numpy().astype('int16')
 
 
 
 
97
  sr = 22050
98
  return (sr, audio)
99
 
@@ -102,6 +109,7 @@ demo = gr.Interface(
102
  [
103
  gr.Textbox(value='Батпақ соры шабындыққа және жыл бойғы жайылымға пайдаланылады.', label="Text you want to synthesize"),
104
  gr.Slider(0, 100, value=50, step=10, label="Count", info="Choose between 0 and 100"),
 
105
  gr.Dropdown(spekears, value=spekears[1], label="Narrator", info="Select a narrator."
106
  ),
107
  gr.Dropdown(emotions, value=emotions[0], label="Emotion 1", info="Select first emotion"),
 
65
  emotions = sorted(["angry", "surprise", "fear", "happy", "neutral", "sad"])
66
  spekears = ['Madi', 'Marzhan', 'Akzhol']
67
 
68
+ def generate_audio(text, quantity, guid, speaker, emotion_1, emotion_2):
69
+ y_dec = torch.tensor([torch.nan])
70
+ gui = guid
71
+ while torch.isnan(y_dec).sum() != 0:
72
+ x, x_lengths = convert_text(text)
73
+ emo_1, emo_2 = emotions.index(emotion_1), emotions.index(emotion_2)
74
+ emo1 = torch.LongTensor([emo_1]).to(device)
75
+ emo2 = torch.LongTensor([emo_2]).to(device)
76
+ sid = torch.LongTensor([spekears.index(speaker)]).to(device)
77
+ intensity = quantity / 100
78
 
79
+ y_enc, y_dec, attn = gradtts_uncond_model.classifier_guidance_decode_two_mixture(
80
+ x, x_lengths,
81
+ n_timesteps=100,
82
+ temperature=2.0,
83
+ stoc=args.stoc,
84
+ spk=sid,
85
+ emo1=emo1,
86
+ emo2=emo2,
87
+ emo1_weight=intensity,
88
+ length_scale=1.,
89
+ classifier_func=model.forward,
90
+ guidance=gui,
91
+ classifier_type=model.model_type
92
+ )
93
+ y_dec = y_dec.detach()
94
+ res = y_dec.squeeze().to(device).numpy()
95
+ x = torch.from_numpy(res).unsqueeze(0)
96
+ y_g_hat = vocoder(x)
97
+ audio = y_g_hat.squeeze()
98
+ audio = audio * 32768.0
99
+ audio = audio.detach().cpu().numpy().astype('int16')
100
+ gui -= 50
101
+ if gui <= 0:
102
+ print('shabuya')
103
+ break
104
  sr = 22050
105
  return (sr, audio)
106
 
 
109
  [
110
  gr.Textbox(value='Батпақ соры шабындыққа және жыл бойғы жайылымға пайдаланылады.', label="Text you want to synthesize"),
111
  gr.Slider(0, 100, value=50, step=10, label="Count", info="Choose between 0 and 100"),
112
+ gr.Slider(0, 1000, value=100, step=10, label="Guidance", info="Choose between 0 and 1000"),
113
  gr.Dropdown(spekears, value=spekears[1], label="Narrator", info="Select a narrator."
114
  ),
115
  gr.Dropdown(emotions, value=emotions[0], label="Emotion 1", info="Select first emotion"),
flagged/log.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ Text you want to synthesize,Count,Guidance,Narrator,Emotion 1,Emotion 2,output,flag,username,timestamp
2
+ Батпақ соры шабындыққа және жыл бойғы жайылымға пайдаланылады.,50,20,Marzhan,angry,neutral,flagged/output/2e9a3b60dc40f07d4db8/audio.wav,,,2024-03-26 16:38:18.508507
3
+ Батпақ соры шабындыққа және жыл бойғы жайылымға пайдаланылады.,50,300,Marzhan,happy,neutral,flagged/output/85c2e39535a1879bccc5/audio.wav,,,2024-03-29 22:08:19.838874
flagged/output/2e9a3b60dc40f07d4db8/audio.wav ADDED
Binary file (171 kB). View file
 
flagged/output/85c2e39535a1879bccc5/audio.wav ADDED
Binary file (173 kB). View file