update
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +12 -6
- audiotools/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/audio_signal.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/display.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/dsp.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/effects.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/ffmpeg.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/loudness.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/playback.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/util.cpython-310.pyc +0 -0
- audiotools/core/__pycache__/whisper.cpython-310.pyc +0 -0
- audiotools/core/templates/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/data/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/data/__pycache__/datasets.cpython-310.pyc +0 -0
- audiotools/data/__pycache__/preprocess.cpython-310.pyc +0 -0
- audiotools/data/__pycache__/transforms.cpython-310.pyc +0 -0
- audiotools/metrics/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/metrics/__pycache__/distance.cpython-310.pyc +0 -0
- audiotools/metrics/__pycache__/quality.cpython-310.pyc +0 -0
- audiotools/metrics/__pycache__/spectral.cpython-310.pyc +0 -0
- audiotools/ml/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/ml/__pycache__/accelerator.cpython-310.pyc +0 -0
- audiotools/ml/__pycache__/decorators.cpython-310.pyc +0 -0
- audiotools/ml/__pycache__/experiment.cpython-310.pyc +0 -0
- audiotools/ml/layers/__pycache__/__init__.cpython-310.pyc +0 -0
- audiotools/ml/layers/__pycache__/base.cpython-310.pyc +0 -0
- audiotools/ml/layers/__pycache__/spectral_gate.cpython-310.pyc +0 -0
- src/__pycache__/inference.cpython-310.pyc +0 -0
- src/models/__pycache__/blocks.cpython-310.pyc +0 -0
- src/models/__pycache__/conditioners.cpython-310.pyc +0 -0
- src/models/__pycache__/udit.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/attention.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/modules.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/rotary.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/span_mask.cpython-310.pyc +0 -0
- src/models/utils/__pycache__/timm.cpython-310.pyc +0 -0
- src/modules/__pycache__/autoencoder_wrapper.cpython-310.pyc +0 -0
- src/modules/dac/__pycache__/__init__.cpython-310.pyc +0 -0
- src/modules/dac/model/__pycache__/__init__.cpython-310.pyc +0 -0
- src/modules/dac/model/__pycache__/base.cpython-310.pyc +0 -0
- src/modules/dac/model/__pycache__/dac.cpython-310.pyc +0 -0
- src/modules/dac/model/__pycache__/discriminator.cpython-310.pyc +0 -0
- src/modules/dac/nn/__pycache__/__init__.cpython-310.pyc +0 -0
- src/modules/dac/nn/__pycache__/layers.cpython-310.pyc +0 -0
- src/modules/dac/nn/__pycache__/loss.cpython-310.pyc +0 -0
- src/modules/dac/nn/__pycache__/quantize.cpython-310.pyc +0 -0
- src/modules/dac/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- src/modules/stable_vae/__pycache__/__init__.cpython-310.pyc +0 -0
app.py
CHANGED
@@ -1,10 +1,11 @@
|
|
1 |
import os
|
2 |
import torch
|
3 |
import random
|
4 |
-
import spaces
|
5 |
import numpy as np
|
6 |
import gradio as gr
|
7 |
import soundfile as sf
|
|
|
8 |
from transformers import T5Tokenizer, T5EncoderModel
|
9 |
from diffusers import DDIMScheduler
|
10 |
from src.models.conditioners import MaskDiT
|
@@ -33,9 +34,12 @@ def load_models(config_name, ckpt_path, vae_path, device):
|
|
33 |
unet.load_state_dict(torch.load(ckpt_path)['model'])
|
34 |
unet.eval()
|
35 |
|
|
|
|
|
|
|
36 |
# Load noise scheduler
|
37 |
noise_scheduler = DDIMScheduler(**params['diff'])
|
38 |
-
|
39 |
latents = torch.randn((1, 128, 128), device=device)
|
40 |
noise = torch.randn_like(latents)
|
41 |
timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (1,), device=device)
|
@@ -43,6 +47,7 @@ def load_models(config_name, ckpt_path, vae_path, device):
|
|
43 |
|
44 |
return autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params
|
45 |
|
|
|
46 |
MAX_SEED = np.iinfo(np.int32).max
|
47 |
|
48 |
# Model and config paths
|
@@ -57,6 +62,7 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
57 |
autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params = load_models(config_name, ckpt_path, vae_path,
|
58 |
device)
|
59 |
|
|
|
60 |
@spaces.GPU
|
61 |
def generate_audio(text, length,
|
62 |
guidance_scale, guidance_rescale, ddim_steps, eta,
|
@@ -102,7 +108,7 @@ css = """
|
|
102 |
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
103 |
with gr.Column(elem_id="col-container"):
|
104 |
gr.Markdown("""
|
105 |
-
# EzAudio Text-to-Audio Generator
|
106 |
Generate audio from text using a diffusion transformer. Adjust advanced settings for more control.
|
107 |
""")
|
108 |
|
@@ -125,10 +131,10 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
|
125 |
with gr.Accordion("Advanced Settings", open=False):
|
126 |
guidance_scale = gr.Slider(minimum=1.0, maximum=10, step=0.1, value=5.0, label="Guidance Scale")
|
127 |
guidance_rescale = gr.Slider(minimum=0.0, maximum=1, step=0.05, value=0.75, label="Guidance Rescale")
|
128 |
-
ddim_steps = gr.Slider(minimum=25, maximum=200, step=5, value=
|
129 |
eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="Eta")
|
130 |
seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
|
131 |
-
randomize_seed = gr.Checkbox(label="Randomize Seed", value=
|
132 |
|
133 |
# Examples block
|
134 |
gr.Examples(
|
@@ -147,4 +153,4 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
|
147 |
)
|
148 |
|
149 |
# Launch the Gradio demo
|
150 |
-
demo.launch()
|
|
|
1 |
import os
|
2 |
import torch
|
3 |
import random
|
4 |
+
# import spaces
|
5 |
import numpy as np
|
6 |
import gradio as gr
|
7 |
import soundfile as sf
|
8 |
+
from accelerate import Accelerator
|
9 |
from transformers import T5Tokenizer, T5EncoderModel
|
10 |
from diffusers import DDIMScheduler
|
11 |
from src.models.conditioners import MaskDiT
|
|
|
34 |
unet.load_state_dict(torch.load(ckpt_path)['model'])
|
35 |
unet.eval()
|
36 |
|
37 |
+
accelerator = Accelerator(mixed_precision="fp16")
|
38 |
+
unet = accelerator.prepare(unet)
|
39 |
+
|
40 |
# Load noise scheduler
|
41 |
noise_scheduler = DDIMScheduler(**params['diff'])
|
42 |
+
|
43 |
latents = torch.randn((1, 128, 128), device=device)
|
44 |
noise = torch.randn_like(latents)
|
45 |
timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (1,), device=device)
|
|
|
47 |
|
48 |
return autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params
|
49 |
|
50 |
+
|
51 |
MAX_SEED = np.iinfo(np.int32).max
|
52 |
|
53 |
# Model and config paths
|
|
|
62 |
autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params = load_models(config_name, ckpt_path, vae_path,
|
63 |
device)
|
64 |
|
65 |
+
|
66 |
@spaces.GPU
|
67 |
def generate_audio(text, length,
|
68 |
guidance_scale, guidance_rescale, ddim_steps, eta,
|
|
|
108 |
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
|
109 |
with gr.Column(elem_id="col-container"):
|
110 |
gr.Markdown("""
|
111 |
+
# EzAudio: High-quality Text-to-Audio Generator
|
112 |
Generate audio from text using a diffusion transformer. Adjust advanced settings for more control.
|
113 |
""")
|
114 |
|
|
|
131 |
with gr.Accordion("Advanced Settings", open=False):
|
132 |
guidance_scale = gr.Slider(minimum=1.0, maximum=10, step=0.1, value=5.0, label="Guidance Scale")
|
133 |
guidance_rescale = gr.Slider(minimum=0.0, maximum=1, step=0.05, value=0.75, label="Guidance Rescale")
|
134 |
+
ddim_steps = gr.Slider(minimum=25, maximum=200, step=5, value=50, label="DDIM Steps")
|
135 |
eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="Eta")
|
136 |
seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
|
137 |
+
randomize_seed = gr.Checkbox(label="Randomize Seed (Disable Seed)", value=True)
|
138 |
|
139 |
# Examples block
|
140 |
gr.Examples(
|
|
|
153 |
)
|
154 |
|
155 |
# Launch the Gradio demo
|
156 |
+
demo.launch()
|
audiotools/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (432 Bytes). View file
|
|
audiotools/core/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (283 Bytes). View file
|
|
audiotools/core/__pycache__/audio_signal.cpython-310.pyc
ADDED
Binary file (45.4 kB). View file
|
|
audiotools/core/__pycache__/display.cpython-310.pyc
ADDED
Binary file (6.36 kB). View file
|
|
audiotools/core/__pycache__/dsp.cpython-310.pyc
ADDED
Binary file (11.6 kB). View file
|
|
audiotools/core/__pycache__/effects.cpython-310.pyc
ADDED
Binary file (17.5 kB). View file
|
|
audiotools/core/__pycache__/ffmpeg.cpython-310.pyc
ADDED
Binary file (5.59 kB). View file
|
|
audiotools/core/__pycache__/loudness.cpython-310.pyc
ADDED
Binary file (8.44 kB). View file
|
|
audiotools/core/__pycache__/playback.cpython-310.pyc
ADDED
Binary file (6.87 kB). View file
|
|
audiotools/core/__pycache__/util.cpython-310.pyc
ADDED
Binary file (18.6 kB). View file
|
|
audiotools/core/__pycache__/whisper.cpython-310.pyc
ADDED
Binary file (2.93 kB). View file
|
|
audiotools/core/templates/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (145 Bytes). View file
|
|
audiotools/data/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (234 Bytes). View file
|
|
audiotools/data/__pycache__/datasets.cpython-310.pyc
ADDED
Binary file (17 kB). View file
|
|
audiotools/data/__pycache__/preprocess.cpython-310.pyc
ADDED
Binary file (2.83 kB). View file
|
|
audiotools/data/__pycache__/transforms.cpython-310.pyc
ADDED
Binary file (55.5 kB). View file
|
|
audiotools/metrics/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (310 Bytes). View file
|
|
audiotools/metrics/__pycache__/distance.cpython-310.pyc
ADDED
Binary file (3.82 kB). View file
|
|
audiotools/metrics/__pycache__/quality.cpython-310.pyc
ADDED
Binary file (4.45 kB). View file
|
|
audiotools/metrics/__pycache__/spectral.cpython-310.pyc
ADDED
Binary file (7.43 kB). View file
|
|
audiotools/ml/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (321 Bytes). View file
|
|
audiotools/ml/__pycache__/accelerator.cpython-310.pyc
ADDED
Binary file (6.65 kB). View file
|
|
audiotools/ml/__pycache__/decorators.cpython-310.pyc
ADDED
Binary file (14.2 kB). View file
|
|
audiotools/ml/__pycache__/experiment.cpython-310.pyc
ADDED
Binary file (3.32 kB). View file
|
|
audiotools/ml/layers/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (228 Bytes). View file
|
|
audiotools/ml/layers/__pycache__/base.cpython-310.pyc
ADDED
Binary file (9.27 kB). View file
|
|
audiotools/ml/layers/__pycache__/spectral_gate.cpython-310.pyc
ADDED
Binary file (3.87 kB). View file
|
|
src/__pycache__/inference.cpython-310.pyc
ADDED
Binary file (4.24 kB). View file
|
|
src/models/__pycache__/blocks.cpython-310.pyc
ADDED
Binary file (7.27 kB). View file
|
|
src/models/__pycache__/conditioners.cpython-310.pyc
ADDED
Binary file (5.59 kB). View file
|
|
src/models/__pycache__/udit.cpython-310.pyc
ADDED
Binary file (7.86 kB). View file
|
|
src/models/utils/__pycache__/__init__.cpython-310.pyc
CHANGED
Binary files a/src/models/utils/__pycache__/__init__.cpython-310.pyc and b/src/models/utils/__pycache__/__init__.cpython-310.pyc differ
|
|
src/models/utils/__pycache__/attention.cpython-310.pyc
CHANGED
Binary files a/src/models/utils/__pycache__/attention.cpython-310.pyc and b/src/models/utils/__pycache__/attention.cpython-310.pyc differ
|
|
src/models/utils/__pycache__/modules.cpython-310.pyc
CHANGED
Binary files a/src/models/utils/__pycache__/modules.cpython-310.pyc and b/src/models/utils/__pycache__/modules.cpython-310.pyc differ
|
|
src/models/utils/__pycache__/rotary.cpython-310.pyc
CHANGED
Binary files a/src/models/utils/__pycache__/rotary.cpython-310.pyc and b/src/models/utils/__pycache__/rotary.cpython-310.pyc differ
|
|
src/models/utils/__pycache__/span_mask.cpython-310.pyc
CHANGED
Binary files a/src/models/utils/__pycache__/span_mask.cpython-310.pyc and b/src/models/utils/__pycache__/span_mask.cpython-310.pyc differ
|
|
src/models/utils/__pycache__/timm.cpython-310.pyc
CHANGED
Binary files a/src/models/utils/__pycache__/timm.cpython-310.pyc and b/src/models/utils/__pycache__/timm.cpython-310.pyc differ
|
|
src/modules/__pycache__/autoencoder_wrapper.cpython-310.pyc
ADDED
Binary file (2.34 kB). View file
|
|
src/modules/dac/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (451 Bytes). View file
|
|
src/modules/dac/model/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (292 Bytes). View file
|
|
src/modules/dac/model/__pycache__/base.cpython-310.pyc
ADDED
Binary file (7.19 kB). View file
|
|
src/modules/dac/model/__pycache__/dac.cpython-310.pyc
ADDED
Binary file (10.6 kB). View file
|
|
src/modules/dac/model/__pycache__/discriminator.cpython-310.pyc
ADDED
Binary file (7.99 kB). View file
|
|
src/modules/dac/nn/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (227 Bytes). View file
|
|
src/modules/dac/nn/__pycache__/layers.cpython-310.pyc
ADDED
Binary file (1.45 kB). View file
|
|
src/modules/dac/nn/__pycache__/loss.cpython-310.pyc
ADDED
Binary file (11.6 kB). View file
|
|
src/modules/dac/nn/__pycache__/quantize.cpython-310.pyc
ADDED
Binary file (8.66 kB). View file
|
|
src/modules/dac/utils/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (2.84 kB). View file
|
|
src/modules/stable_vae/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (1.2 kB). View file
|
|