Spaces:
Build error
Build error
Update audio_foundation_models.py
Browse files
audio_foundation_models.py
CHANGED
@@ -113,7 +113,7 @@ class T2A:
|
|
113 |
def __init__(self, device):
|
114 |
print("Initializing Make-An-Audio to %s" % device)
|
115 |
self.device = device
|
116 |
-
self.sampler = initialize_model('configs/text-to-audio/txt2audio_args.yaml', 'useful_ckpts/ta40multi_epoch=000085.ckpt', device=device)
|
117 |
self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
|
118 |
|
119 |
def txt2audio(self, text, seed = 55, scale = 1.5, ddim_steps = 100, n_samples = 3, W = 624, H = 80):
|
@@ -160,8 +160,8 @@ class I2A:
|
|
160 |
def __init__(self, device):
|
161 |
print("Initializing Make-An-Audio-Image to %s" % device)
|
162 |
self.device = device
|
163 |
-
self.sampler = initialize_model('text_to_audio/
|
164 |
-
self.vocoder = VocoderBigVGAN('text_to_audio/
|
165 |
def img2audio(self, image, seed = 55, scale = 3, ddim_steps = 100, W = 624, H = 80):
|
166 |
SAMPLE_RATE = 16000
|
167 |
n_samples = 1 # only support 1 sample
|
@@ -224,7 +224,7 @@ class T2S:
|
|
224 |
print("Initializing DiffSinger to %s" % device)
|
225 |
self.device = device
|
226 |
self.exp_name = 'checkpoints/0831_opencpop_ds1000'
|
227 |
-
self.config= '
|
228 |
self.set_model_hparams()
|
229 |
self.pipe = DiffSingerE2EInfer(self.hp, device)
|
230 |
self.default_inp = {
|
@@ -259,7 +259,7 @@ class TTS_OOD:
|
|
259 |
print("Initializing GenerSpeech to %s" % device)
|
260 |
self.device = device
|
261 |
self.exp_name = 'checkpoints/GenerSpeech'
|
262 |
-
self.config = '
|
263 |
self.set_model_hparams()
|
264 |
self.pipe = GenerSpeechInfer(self.hp, device)
|
265 |
|
@@ -291,7 +291,7 @@ class Inpaint:
|
|
291 |
def __init__(self, device):
|
292 |
print("Initializing Make-An-Audio-inpaint to %s" % device)
|
293 |
self.device = device
|
294 |
-
self.sampler = initialize_model_inpaint('text_to_audio/
|
295 |
self.vocoder = VocoderBigVGAN('./vocoder/logs/bigv16k53w',device=device)
|
296 |
self.cmap_transform = matplotlib.cm.viridis
|
297 |
def make_batch_sd(self, mel, mask, num_samples=1):
|
|
|
113 |
def __init__(self, device):
|
114 |
print("Initializing Make-An-Audio to %s" % device)
|
115 |
self.device = device
|
116 |
+
self.sampler = initialize_model('text_to_audio/Make_An_Audio/configs/text-to-audio/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/ta40multi_epoch=000085.ckpt', device=device)
|
117 |
self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
|
118 |
|
119 |
def txt2audio(self, text, seed = 55, scale = 1.5, ddim_steps = 100, n_samples = 3, W = 624, H = 80):
|
|
|
160 |
def __init__(self, device):
|
161 |
print("Initializing Make-An-Audio-Image to %s" % device)
|
162 |
self.device = device
|
163 |
+
self.sampler = initialize_model('text_to_audio/Make_An_Audio/configs/img_to_audio/img2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/ta54_epoch=000216.ckpt', device=device)
|
164 |
+
self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
|
165 |
def img2audio(self, image, seed = 55, scale = 3, ddim_steps = 100, W = 624, H = 80):
|
166 |
SAMPLE_RATE = 16000
|
167 |
n_samples = 1 # only support 1 sample
|
|
|
224 |
print("Initializing DiffSinger to %s" % device)
|
225 |
self.device = device
|
226 |
self.exp_name = 'checkpoints/0831_opencpop_ds1000'
|
227 |
+
self.config= 'NeuralSeq/egs/egs_bases/svs/midi/e2e/opencpop/ds1000.yaml'
|
228 |
self.set_model_hparams()
|
229 |
self.pipe = DiffSingerE2EInfer(self.hp, device)
|
230 |
self.default_inp = {
|
|
|
259 |
print("Initializing GenerSpeech to %s" % device)
|
260 |
self.device = device
|
261 |
self.exp_name = 'checkpoints/GenerSpeech'
|
262 |
+
self.config = 'NeuralSeq/modules/GenerSpeech/config/generspeech.yaml'
|
263 |
self.set_model_hparams()
|
264 |
self.pipe = GenerSpeechInfer(self.hp, device)
|
265 |
|
|
|
291 |
def __init__(self, device):
|
292 |
print("Initializing Make-An-Audio-inpaint to %s" % device)
|
293 |
self.device = device
|
294 |
+
self.sampler = initialize_model_inpaint('text_to_audio/Make_An_Audio/configs/inpaint/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/inpaint7_epoch00047.ckpt')
|
295 |
self.vocoder = VocoderBigVGAN('./vocoder/logs/bigv16k53w',device=device)
|
296 |
self.cmap_transform = matplotlib.cm.viridis
|
297 |
def make_batch_sd(self, mel, mask, num_samples=1):
|