Spaces:

ganga4364
/

mms-tts-bod-speaker

Sleeping

mms-tts-bod-speaker / app.py

Update app.py

2939710 verified 2 months ago

1.19 kB

	import gradio as gr
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
	import scipy.io.wavfile
	import numpy as np

	# Load the MMS-TTS model and processor for Tibetan (bod)
	model_id = "ganga4364/mms-tts-bod-finetune-sherab" # Replace with your fine-tuned model if necessary


	# Use the text-to-speech pipeline with the model
	synthesiser = pipeline("text-to-speech", model_id) # add device=0 if you want to use a GPU


	# Function to perform TTS inference and save audio to a file
	def generate_audio(input_text):
	# Perform TTS inference
	speech = synthesiser(input_text)
	file_path = "finetuned_output.wav"
	# Save the audio to a file (e.g., 'output.wav')
	scipy.io.wavfile.write(file_path, rate=speech["sampling_rate"], data=speech["audio"][0])

	# Return the path to the audio file
	return file_path

	# Create the Gradio interface
	iface = gr.Interface(
	fn=generate_audio,
	inputs="text", # Text input for the TTS
	outputs="audio", # Output will be an audio file
	title="Tibetan Text-to-Speech (MMS-TTS)",
	description="Enter Tibetan text and generate speech using MMS-TTS."
	)

	# Launch the Gradio interface
	iface.launch()