lmzjms commited on
Commit
fda2ed9
1 Parent(s): ea5f319

Update audio_foundation_models.py

Browse files
Files changed (1) hide show
  1. audio_foundation_models.py +6 -6
audio_foundation_models.py CHANGED
@@ -113,7 +113,7 @@ class T2A:
113
  def __init__(self, device):
114
  print("Initializing Make-An-Audio to %s" % device)
115
  self.device = device
116
- self.sampler = initialize_model('configs/text-to-audio/txt2audio_args.yaml', 'useful_ckpts/ta40multi_epoch=000085.ckpt', device=device)
117
  self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
118
 
119
  def txt2audio(self, text, seed = 55, scale = 1.5, ddim_steps = 100, n_samples = 3, W = 624, H = 80):
@@ -160,8 +160,8 @@ class I2A:
160
  def __init__(self, device):
161
  print("Initializing Make-An-Audio-Image to %s" % device)
162
  self.device = device
163
- self.sampler = initialize_model('text_to_audio/Make_An_Audio_img/configs/img_to_audio/img2audio_args.yaml', 'text_to_audio/Make_An_Audio_img/useful_ckpts/ta54_epoch=000216.ckpt', device=device)
164
- self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio_img/vocoder/logs/bigv16k53w',device=device)
165
  def img2audio(self, image, seed = 55, scale = 3, ddim_steps = 100, W = 624, H = 80):
166
  SAMPLE_RATE = 16000
167
  n_samples = 1 # only support 1 sample
@@ -224,7 +224,7 @@ class T2S:
224
  print("Initializing DiffSinger to %s" % device)
225
  self.device = device
226
  self.exp_name = 'checkpoints/0831_opencpop_ds1000'
227
- self.config= 'text_to_sing/DiffSinger/usr/configs/midi/e2e/opencpop/ds1000.yaml'
228
  self.set_model_hparams()
229
  self.pipe = DiffSingerE2EInfer(self.hp, device)
230
  self.default_inp = {
@@ -259,7 +259,7 @@ class TTS_OOD:
259
  print("Initializing GenerSpeech to %s" % device)
260
  self.device = device
261
  self.exp_name = 'checkpoints/GenerSpeech'
262
- self.config = 'text_to_sing/DiffSinger/modules/GenerSpeech/config/generspeech.yaml'
263
  self.set_model_hparams()
264
  self.pipe = GenerSpeechInfer(self.hp, device)
265
 
@@ -291,7 +291,7 @@ class Inpaint:
291
  def __init__(self, device):
292
  print("Initializing Make-An-Audio-inpaint to %s" % device)
293
  self.device = device
294
- self.sampler = initialize_model_inpaint('text_to_audio/Make_An_Audio_inpaint/configs/inpaint/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio_inpaint/useful_ckpts/inpaint7_epoch00047.ckpt')
295
  self.vocoder = VocoderBigVGAN('./vocoder/logs/bigv16k53w',device=device)
296
  self.cmap_transform = matplotlib.cm.viridis
297
  def make_batch_sd(self, mel, mask, num_samples=1):
 
113
  def __init__(self, device):
114
  print("Initializing Make-An-Audio to %s" % device)
115
  self.device = device
116
+ self.sampler = initialize_model('text_to_audio/Make_An_Audio/configs/text-to-audio/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/ta40multi_epoch=000085.ckpt', device=device)
117
  self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
118
 
119
  def txt2audio(self, text, seed = 55, scale = 1.5, ddim_steps = 100, n_samples = 3, W = 624, H = 80):
 
160
  def __init__(self, device):
161
  print("Initializing Make-An-Audio-Image to %s" % device)
162
  self.device = device
163
+ self.sampler = initialize_model('text_to_audio/Make_An_Audio/configs/img_to_audio/img2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/ta54_epoch=000216.ckpt', device=device)
164
+ self.vocoder = VocoderBigVGAN('text_to_audio/Make_An_Audio/vocoder/logs/bigv16k53w',device=device)
165
  def img2audio(self, image, seed = 55, scale = 3, ddim_steps = 100, W = 624, H = 80):
166
  SAMPLE_RATE = 16000
167
  n_samples = 1 # only support 1 sample
 
224
  print("Initializing DiffSinger to %s" % device)
225
  self.device = device
226
  self.exp_name = 'checkpoints/0831_opencpop_ds1000'
227
+ self.config= 'NeuralSeq/egs/egs_bases/svs/midi/e2e/opencpop/ds1000.yaml'
228
  self.set_model_hparams()
229
  self.pipe = DiffSingerE2EInfer(self.hp, device)
230
  self.default_inp = {
 
259
  print("Initializing GenerSpeech to %s" % device)
260
  self.device = device
261
  self.exp_name = 'checkpoints/GenerSpeech'
262
+ self.config = 'NeuralSeq/modules/GenerSpeech/config/generspeech.yaml'
263
  self.set_model_hparams()
264
  self.pipe = GenerSpeechInfer(self.hp, device)
265
 
 
291
  def __init__(self, device):
292
  print("Initializing Make-An-Audio-inpaint to %s" % device)
293
  self.device = device
294
+ self.sampler = initialize_model_inpaint('text_to_audio/Make_An_Audio/configs/inpaint/txt2audio_args.yaml', 'text_to_audio/Make_An_Audio/useful_ckpts/inpaint7_epoch00047.ckpt')
295
  self.vocoder = VocoderBigVGAN('./vocoder/logs/bigv16k53w',device=device)
296
  self.cmap_transform = matplotlib.cm.viridis
297
  def make_batch_sd(self, mel, mask, num_samples=1):