Spaces:

ganga4364
/

mms-tts-bod-speaker

Sleeping

App Files Files Community

ganga4364 commited on Oct 8

Commit

786f6ac

•

1 Parent(s): 7070768

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -0

app.py CHANGED Viewed

	@@ -0,0 +1,40 @@

+import gradio as gr
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+import scipy.io.wavfile
+import numpy as np
+# Load the MMS-TTS model and processor for Tibetan (bod)
+model_id = "ganga4364/mms-tts-bod-female"  # Replace with your fine-tuned model if necessary
+processor = AutoProcessor.from_pretrained(model_id)
+model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
+# Use the text-to-speech pipeline with the model
+synthesiser = pipeline("text-to-speech", model=model, processor=processor, device=0)  # Use GPU if available
+# Function to perform TTS inference and save audio to a file
+def generate_audio(input_text):
+    # Perform TTS inference
+    speech = synthesiser(input_text)
+    # Extract the audio data and sampling rate from the pipeline output
+    audio_data = np.array(speech["audio"])
+    sample_rate = speech["sampling_rate"]
+    # Save the audio to a file (e.g., 'output.wav')
+    file_path = "output_tibetan.wav"
+    scipy.io.wavfile.write(file_path, rate=sample_rate, data=audio_data.astype(np.int16))  # Ensure correct format
+    # Return the path to the audio file
+    return file_path
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=generate_audio,
+    inputs="text",  # Text input for the TTS
+    outputs="audio",  # Output will be an audio file
+    title="Tibetan Text-to-Speech (MMS-TTS)",
+    description="Enter Tibetan text and generate speech using MMS-TTS."
+)
+# Launch the Gradio interface
+iface.launch()