Spaces:
Running
on
T4
Running
on
T4
georg-suno
commited on
Commit
•
1892f86
1
Parent(s):
bbc001b
add new prompts
Browse files
app.py
CHANGED
@@ -1,19 +1,27 @@
|
|
1 |
import numpy as np
|
2 |
import gradio as gr
|
3 |
from bark import SAMPLE_RATE, generate_audio, preload_models
|
|
|
4 |
|
5 |
DEBUG_MODE = False
|
6 |
|
7 |
if not DEBUG_MODE:
|
8 |
_ = preload_models()
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
|
11 |
|
12 |
def gen_tts(text, history_prompt, temp_semantic, temp_waveform):
|
13 |
-
|
14 |
-
history_prompt = None
|
15 |
-
else:
|
16 |
-
history_prompt = history_prompt.lower().replace(" ", "_").replace("speaker", "speech")
|
17 |
if DEBUG_MODE:
|
18 |
audio_arr = np.zeros(SAMPLE_RATE)
|
19 |
else:
|
@@ -26,10 +34,7 @@ iface = gr.Interface(
|
|
26 |
fn=gen_tts,
|
27 |
inputs=[
|
28 |
gr.Textbox(label="Input Text", lines=3, value=default_text),
|
29 |
-
gr.Dropdown(
|
30 |
-
["Unconditional"] + [f"Speaker {n}" for n in range(8)] + [f"Music {n}" for n in range(6)],
|
31 |
-
value="None", label="Acoustic Prompt", info="This choice primes the model on how to condition the generated audio."
|
32 |
-
),
|
33 |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 1", info="Gen. temperature of semantic tokens. (lower is more conservative, higher is more diverse)"),
|
34 |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 2", info="Gen. temperature of waveform tokens. (lower is more conservative, higher is more diverse)"),
|
35 |
],
|
|
|
1 |
import numpy as np
|
2 |
import gradio as gr
|
3 |
from bark import SAMPLE_RATE, generate_audio, preload_models
|
4 |
+
from bark.generation import SUPPORTED_LANGS
|
5 |
|
6 |
DEBUG_MODE = False
|
7 |
|
8 |
if not DEBUG_MODE:
|
9 |
_ = preload_models()
|
10 |
|
11 |
+
AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
|
12 |
+
PROMPT_LOOKUP = {}
|
13 |
+
for _, lang in SUPPORTED_LANGS:
|
14 |
+
for n in range(10):
|
15 |
+
label = f"Speaker {n} ({lang})"
|
16 |
+
AVAILABLE_PROMPTS.append(label)
|
17 |
+
PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
|
18 |
+
PROMPT_LOOKUP["Unconditional"] = None
|
19 |
+
PROMPT_LOOKUP["Announcer"] = "announcer"
|
20 |
+
|
21 |
default_text = "Hello, my name is Suno. And, uh — and I like pizza. [laughs]\nBut I also have other interests such as playing tic tac toe."
|
22 |
|
23 |
def gen_tts(text, history_prompt, temp_semantic, temp_waveform):
|
24 |
+
history_prompt = PROMPT_LOOKUP[history_prompt]
|
|
|
|
|
|
|
25 |
if DEBUG_MODE:
|
26 |
audio_arr = np.zeros(SAMPLE_RATE)
|
27 |
else:
|
|
|
34 |
fn=gen_tts,
|
35 |
inputs=[
|
36 |
gr.Textbox(label="Input Text", lines=3, value=default_text),
|
37 |
+
gr.Dropdown(AVAILABLE_PROMPTS, value="None", label="Acoustic Prompt", info="This choice primes the model on how to condition the generated audio."),
|
|
|
|
|
|
|
38 |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 1", info="Gen. temperature of semantic tokens. (lower is more conservative, higher is more diverse)"),
|
39 |
gr.Slider(minimum=0, maximum=1, step=0.01, value=0.7, label="Temp 2", info="Gen. temperature of waveform tokens. (lower is more conservative, higher is more diverse)"),
|
40 |
],
|