EzAudio

Paused

App Files Files Community

OpenSound commited on Sep 16

Commit

eb39bd3

•

1 Parent(s): 5d104f9

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +12 -6
audiotools/__pycache__/__init__.cpython-310.pyc +0 -0
audiotools/core/__pycache__/__init__.cpython-310.pyc +0 -0
audiotools/core/__pycache__/audio_signal.cpython-310.pyc +0 -0
audiotools/core/__pycache__/display.cpython-310.pyc +0 -0
audiotools/core/__pycache__/dsp.cpython-310.pyc +0 -0
audiotools/core/__pycache__/effects.cpython-310.pyc +0 -0
audiotools/core/__pycache__/ffmpeg.cpython-310.pyc +0 -0
audiotools/core/__pycache__/loudness.cpython-310.pyc +0 -0
audiotools/core/__pycache__/playback.cpython-310.pyc +0 -0
audiotools/core/__pycache__/util.cpython-310.pyc +0 -0
audiotools/core/__pycache__/whisper.cpython-310.pyc +0 -0
audiotools/core/templates/__pycache__/__init__.cpython-310.pyc +0 -0
audiotools/data/__pycache__/__init__.cpython-310.pyc +0 -0
audiotools/data/__pycache__/datasets.cpython-310.pyc +0 -0
audiotools/data/__pycache__/preprocess.cpython-310.pyc +0 -0
audiotools/data/__pycache__/transforms.cpython-310.pyc +0 -0
audiotools/metrics/__pycache__/__init__.cpython-310.pyc +0 -0
audiotools/metrics/__pycache__/distance.cpython-310.pyc +0 -0
audiotools/metrics/__pycache__/quality.cpython-310.pyc +0 -0
audiotools/metrics/__pycache__/spectral.cpython-310.pyc +0 -0
audiotools/ml/__pycache__/__init__.cpython-310.pyc +0 -0
audiotools/ml/__pycache__/accelerator.cpython-310.pyc +0 -0
audiotools/ml/__pycache__/decorators.cpython-310.pyc +0 -0
audiotools/ml/__pycache__/experiment.cpython-310.pyc +0 -0
audiotools/ml/layers/__pycache__/__init__.cpython-310.pyc +0 -0
audiotools/ml/layers/__pycache__/base.cpython-310.pyc +0 -0
audiotools/ml/layers/__pycache__/spectral_gate.cpython-310.pyc +0 -0
src/__pycache__/inference.cpython-310.pyc +0 -0
src/models/__pycache__/blocks.cpython-310.pyc +0 -0
src/models/__pycache__/conditioners.cpython-310.pyc +0 -0
src/models/__pycache__/udit.cpython-310.pyc +0 -0
src/models/utils/__pycache__/__init__.cpython-310.pyc +0 -0
src/models/utils/__pycache__/attention.cpython-310.pyc +0 -0
src/models/utils/__pycache__/modules.cpython-310.pyc +0 -0
src/models/utils/__pycache__/rotary.cpython-310.pyc +0 -0
src/models/utils/__pycache__/span_mask.cpython-310.pyc +0 -0
src/models/utils/__pycache__/timm.cpython-310.pyc +0 -0
src/modules/__pycache__/autoencoder_wrapper.cpython-310.pyc +0 -0
src/modules/dac/__pycache__/__init__.cpython-310.pyc +0 -0
src/modules/dac/model/__pycache__/__init__.cpython-310.pyc +0 -0
src/modules/dac/model/__pycache__/base.cpython-310.pyc +0 -0
src/modules/dac/model/__pycache__/dac.cpython-310.pyc +0 -0
src/modules/dac/model/__pycache__/discriminator.cpython-310.pyc +0 -0
src/modules/dac/nn/__pycache__/__init__.cpython-310.pyc +0 -0
src/modules/dac/nn/__pycache__/layers.cpython-310.pyc +0 -0
src/modules/dac/nn/__pycache__/loss.cpython-310.pyc +0 -0
src/modules/dac/nn/__pycache__/quantize.cpython-310.pyc +0 -0
src/modules/dac/utils/__pycache__/__init__.cpython-310.pyc +0 -0
src/modules/stable_vae/__pycache__/__init__.cpython-310.pyc +0 -0

app.py CHANGED Viewed

@@ -1,10 +1,11 @@
 import os
 import torch
 import random
-import spaces
 import numpy as np
 import gradio as gr
 import soundfile as sf
 from transformers import T5Tokenizer, T5EncoderModel
 from diffusers import DDIMScheduler
 from src.models.conditioners import MaskDiT
@@ -33,9 +34,12 @@ def load_models(config_name, ckpt_path, vae_path, device):
     unet.load_state_dict(torch.load(ckpt_path)['model'])
     unet.eval()
     # Load noise scheduler
     noise_scheduler = DDIMScheduler(**params['diff'])
     latents = torch.randn((1, 128, 128), device=device)
     noise = torch.randn_like(latents)
     timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (1,), device=device)
@@ -43,6 +47,7 @@ def load_models(config_name, ckpt_path, vae_path, device):
     return autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params
 MAX_SEED = np.iinfo(np.int32).max
 # Model and config paths
@@ -57,6 +62,7 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
 autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params = load_models(config_name, ckpt_path, vae_path,
                                                                                   device)
 @spaces.GPU
 def generate_audio(text, length,
                    guidance_scale, guidance_rescale, ddim_steps, eta,
@@ -102,7 +108,7 @@ css = """
 with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("""
-        # EzAudio Text-to-Audio Generator
         Generate audio from text using a diffusion transformer. Adjust advanced settings for more control.
         """)
@@ -125,10 +131,10 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
         with gr.Accordion("Advanced Settings", open=False):
             guidance_scale = gr.Slider(minimum=1.0, maximum=10, step=0.1, value=5.0, label="Guidance Scale")
             guidance_rescale = gr.Slider(minimum=0.0, maximum=1, step=0.05, value=0.75, label="Guidance Rescale")
-            ddim_steps = gr.Slider(minimum=25, maximum=200, step=5, value=100, label="DDIM Steps")
             eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="Eta")
             seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
-            randomize_seed = gr.Checkbox(label="Randomize Seed", value=False)
         # Examples block
         gr.Examples(
@@ -147,4 +153,4 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
         )
 # Launch the Gradio demo
-demo.launch()

 import os
 import torch
 import random
+# import spaces
 import numpy as np
 import gradio as gr
 import soundfile as sf
+from accelerate import Accelerator
 from transformers import T5Tokenizer, T5EncoderModel
 from diffusers import DDIMScheduler
 from src.models.conditioners import MaskDiT
     unet.load_state_dict(torch.load(ckpt_path)['model'])
     unet.eval()
+    accelerator = Accelerator(mixed_precision="fp16")
+    unet = accelerator.prepare(unet)
     # Load noise scheduler
     noise_scheduler = DDIMScheduler(**params['diff'])
     latents = torch.randn((1, 128, 128), device=device)
     noise = torch.randn_like(latents)
     timesteps = torch.randint(0, noise_scheduler.config.num_train_timesteps, (1,), device=device)
     return autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params
 MAX_SEED = np.iinfo(np.int32).max
 # Model and config paths
 autoencoder, unet, tokenizer, text_encoder, noise_scheduler, params = load_models(config_name, ckpt_path, vae_path,
                                                                                   device)
 @spaces.GPU
 def generate_audio(text, length,
                    guidance_scale, guidance_rescale, ddim_steps, eta,
 with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_id="col-container"):
         gr.Markdown("""
+        # EzAudio: High-quality Text-to-Audio Generator
         Generate audio from text using a diffusion transformer. Adjust advanced settings for more control.
         """)
         with gr.Accordion("Advanced Settings", open=False):
             guidance_scale = gr.Slider(minimum=1.0, maximum=10, step=0.1, value=5.0, label="Guidance Scale")
             guidance_rescale = gr.Slider(minimum=0.0, maximum=1, step=0.05, value=0.75, label="Guidance Rescale")
+            ddim_steps = gr.Slider(minimum=25, maximum=200, step=5, value=50, label="DDIM Steps")
             eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1.0, label="Eta")
             seed = gr.Slider(minimum=0, maximum=MAX_SEED, step=1, value=0, label="Seed")
+            randomize_seed = gr.Checkbox(label="Randomize Seed (Disable Seed)", value=True)
         # Examples block
         gr.Examples(
         )
 # Launch the Gradio demo
+demo.launch()

audiotools/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (432 Bytes). View file

audiotools/core/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (283 Bytes). View file

audiotools/core/__pycache__/audio_signal.cpython-310.pyc ADDED Viewed

Binary file (45.4 kB). View file

audiotools/core/__pycache__/display.cpython-310.pyc ADDED Viewed

Binary file (6.36 kB). View file

audiotools/core/__pycache__/dsp.cpython-310.pyc ADDED Viewed

Binary file (11.6 kB). View file

audiotools/core/__pycache__/effects.cpython-310.pyc ADDED Viewed

Binary file (17.5 kB). View file

audiotools/core/__pycache__/ffmpeg.cpython-310.pyc ADDED Viewed

Binary file (5.59 kB). View file

audiotools/core/__pycache__/loudness.cpython-310.pyc ADDED Viewed

Binary file (8.44 kB). View file

audiotools/core/__pycache__/playback.cpython-310.pyc ADDED Viewed

Binary file (6.87 kB). View file

audiotools/core/__pycache__/util.cpython-310.pyc ADDED Viewed

Binary file (18.6 kB). View file

audiotools/core/__pycache__/whisper.cpython-310.pyc ADDED Viewed

Binary file (2.93 kB). View file

audiotools/core/templates/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (145 Bytes). View file

audiotools/data/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (234 Bytes). View file

audiotools/data/__pycache__/datasets.cpython-310.pyc ADDED Viewed

Binary file (17 kB). View file

audiotools/data/__pycache__/preprocess.cpython-310.pyc ADDED Viewed

Binary file (2.83 kB). View file

audiotools/data/__pycache__/transforms.cpython-310.pyc ADDED Viewed

Binary file (55.5 kB). View file

audiotools/metrics/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (310 Bytes). View file

audiotools/metrics/__pycache__/distance.cpython-310.pyc ADDED Viewed

Binary file (3.82 kB). View file

audiotools/metrics/__pycache__/quality.cpython-310.pyc ADDED Viewed

Binary file (4.45 kB). View file

audiotools/metrics/__pycache__/spectral.cpython-310.pyc ADDED Viewed

Binary file (7.43 kB). View file

audiotools/ml/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (321 Bytes). View file

audiotools/ml/__pycache__/accelerator.cpython-310.pyc ADDED Viewed

Binary file (6.65 kB). View file

audiotools/ml/__pycache__/decorators.cpython-310.pyc ADDED Viewed

Binary file (14.2 kB). View file

audiotools/ml/__pycache__/experiment.cpython-310.pyc ADDED Viewed

Binary file (3.32 kB). View file

audiotools/ml/layers/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (228 Bytes). View file

audiotools/ml/layers/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (9.27 kB). View file

audiotools/ml/layers/__pycache__/spectral_gate.cpython-310.pyc ADDED Viewed

Binary file (3.87 kB). View file

src/__pycache__/inference.cpython-310.pyc ADDED Viewed

Binary file (4.24 kB). View file

src/models/__pycache__/blocks.cpython-310.pyc ADDED Viewed

Binary file (7.27 kB). View file

src/models/__pycache__/conditioners.cpython-310.pyc ADDED Viewed

Binary file (5.59 kB). View file

src/models/__pycache__/udit.cpython-310.pyc ADDED Viewed

Binary file (7.86 kB). View file

src/models/utils/__pycache__/__init__.cpython-310.pyc CHANGED Viewed

Binary files a/src/models/utils/__pycache__/__init__.cpython-310.pyc and b/src/models/utils/__pycache__/__init__.cpython-310.pyc differ

src/models/utils/__pycache__/attention.cpython-310.pyc CHANGED Viewed

Binary files a/src/models/utils/__pycache__/attention.cpython-310.pyc and b/src/models/utils/__pycache__/attention.cpython-310.pyc differ

src/models/utils/__pycache__/modules.cpython-310.pyc CHANGED Viewed

Binary files a/src/models/utils/__pycache__/modules.cpython-310.pyc and b/src/models/utils/__pycache__/modules.cpython-310.pyc differ

src/models/utils/__pycache__/rotary.cpython-310.pyc CHANGED Viewed

Binary files a/src/models/utils/__pycache__/rotary.cpython-310.pyc and b/src/models/utils/__pycache__/rotary.cpython-310.pyc differ

src/models/utils/__pycache__/span_mask.cpython-310.pyc CHANGED Viewed

Binary files a/src/models/utils/__pycache__/span_mask.cpython-310.pyc and b/src/models/utils/__pycache__/span_mask.cpython-310.pyc differ

src/models/utils/__pycache__/timm.cpython-310.pyc CHANGED Viewed

Binary files a/src/models/utils/__pycache__/timm.cpython-310.pyc and b/src/models/utils/__pycache__/timm.cpython-310.pyc differ

src/modules/__pycache__/autoencoder_wrapper.cpython-310.pyc ADDED Viewed

Binary file (2.34 kB). View file

src/modules/dac/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (451 Bytes). View file

src/modules/dac/model/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (292 Bytes). View file

src/modules/dac/model/__pycache__/base.cpython-310.pyc ADDED Viewed

Binary file (7.19 kB). View file

src/modules/dac/model/__pycache__/dac.cpython-310.pyc ADDED Viewed

Binary file (10.6 kB). View file

src/modules/dac/model/__pycache__/discriminator.cpython-310.pyc ADDED Viewed

Binary file (7.99 kB). View file

src/modules/dac/nn/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (227 Bytes). View file

src/modules/dac/nn/__pycache__/layers.cpython-310.pyc ADDED Viewed

Binary file (1.45 kB). View file

src/modules/dac/nn/__pycache__/loss.cpython-310.pyc ADDED Viewed

Binary file (11.6 kB). View file

src/modules/dac/nn/__pycache__/quantize.cpython-310.pyc ADDED Viewed

Binary file (8.66 kB). View file

src/modules/dac/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (2.84 kB). View file

src/modules/stable_vae/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.2 kB). View file