lmzjms commited on
Commit
7c71ec4
1 Parent(s): 9e43f21

Update audio_foundation_models.py

Browse files
Files changed (1) hide show
  1. audio_foundation_models.py +27 -10
audio_foundation_models.py CHANGED
@@ -190,7 +190,7 @@ class I2A:
190
 
191
  @prompts(name="Generate Audio From The Image",
192
  description="useful for when you want to generate an audio "
193
- "based on an image.""
194
  "The input to this tool should be a string, "
195
  "representing the image_path. ")
196
 
@@ -237,6 +237,23 @@ class I2A:
237
  print(f"Processed I2a.run, image_filename: {image}, audio_filename: {audio_filename}")
238
  return audio_filename
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  class T2S:
241
  def __init__(self, device= None):
242
  if device is None:
@@ -394,14 +411,6 @@ class Inpaint:
394
  input_wav = ori_wav[:input_len]
395
  mel = TRANSFORMS_16000(input_wav)
396
  return mel
397
- def show_mel_fn(self, input_audio_path):
398
- crop_len = 500 # the full mel cannot be showed due to gradio's Image bug when using tool='sketch'
399
- crop_mel = self.gen_mel(input_audio_path)[:,:crop_len]
400
- color_mel = self.cmap_transform(crop_mel)
401
- image = Image.fromarray((color_mel*255).astype(np.uint8))
402
- image_filename = os.path.join('image', str(uuid.uuid4())[0:8] + ".png")
403
- image.save(image_filename)
404
- return image_filename
405
  def inpaint(self, batch, seed, ddim_steps, num_samples=1, W=512, H=512):
406
  model = self.sampler.model
407
 
@@ -432,7 +441,7 @@ class Inpaint:
432
  inapint_wav = self.vocoder.vocode(inpainted)
433
 
434
  return inpainted, inapint_wav
435
- def inference(self, input_audio, mel_and_mask, seed = 55, ddim_steps = 100):
436
  SAMPLE_RATE = 16000
437
  torch.set_grad_enabled(False)
438
  mel_img = Image.open(mel_and_mask['image'])
@@ -462,6 +471,14 @@ class Inpaint:
462
  audio_filename = os.path.join('audio', str(uuid.uuid4())[0:8] + ".wav")
463
  soundfile.write(audio_filename, gen_wav, samplerate = 16000)
464
  return image_filename, audio_filename
 
 
 
 
 
 
 
 
465
 
466
  class ASR:
467
  def __init__(self, device):
 
190
 
191
  @prompts(name="Generate Audio From The Image",
192
  description="useful for when you want to generate an audio "
193
+ "based on an image. "
194
  "The input to this tool should be a string, "
195
  "representing the image_path. ")
196
 
 
237
  print(f"Processed I2a.run, image_filename: {image}, audio_filename: {audio_filename}")
238
  return audio_filename
239
 
240
+ class TTS:
241
+ def __init__(self, device=None):
242
+ self.inferencer = TTSInference(device)
243
+
244
+ @prompts(name="Synthesize Speech Given the User Input Text",
245
+ description="useful for when you want to convert a user input text into speech audio it saved it to a file."
246
+ "The input to this tool should be a string, "
247
+ "representing the text used to be converted to speech.")
248
+
249
+ def inference(self, text):
250
+ global temp_audio_filename
251
+ inp = {"text": text}
252
+ out = self.inferencer.infer_once(inp)
253
+ audio_filename = os.path.join('audio', str(uuid.uuid4())[0:8] + ".wav")
254
+ soundfile.write(audio_filename, out, samplerate = 22050)
255
+ return audio_filename
256
+
257
  class T2S:
258
  def __init__(self, device= None):
259
  if device is None:
 
411
  input_wav = ori_wav[:input_len]
412
  mel = TRANSFORMS_16000(input_wav)
413
  return mel
 
 
 
 
 
 
 
 
414
  def inpaint(self, batch, seed, ddim_steps, num_samples=1, W=512, H=512):
415
  model = self.sampler.model
416
 
 
441
  inapint_wav = self.vocoder.vocode(inpainted)
442
 
443
  return inpainted, inapint_wav
444
+ def predict(self, input_audio, mel_and_mask, seed = 55, ddim_steps = 100):
445
  SAMPLE_RATE = 16000
446
  torch.set_grad_enabled(False)
447
  mel_img = Image.open(mel_and_mask['image'])
 
471
  audio_filename = os.path.join('audio', str(uuid.uuid4())[0:8] + ".wav")
472
  soundfile.write(audio_filename, gen_wav, samplerate = 16000)
473
  return image_filename, audio_filename
474
+ def inference(self, input_audio_path):
475
+ crop_len = 500 # the full mel cannot be showed due to gradio's Image bug when using tool='sketch'
476
+ crop_mel = self.gen_mel(input_audio_path)[:,:crop_len]
477
+ color_mel = self.cmap_transform(crop_mel)
478
+ image = Image.fromarray((color_mel*255).astype(np.uint8))
479
+ image_filename = os.path.join('image', str(uuid.uuid4())[0:8] + ".png")
480
+ image.save(image_filename)
481
+ return image_filename
482
 
483
  class ASR:
484
  def __init__(self, device):