ganga4364 commited on
Commit
786f6ac
1 Parent(s): 7070768

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -0
app.py CHANGED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ import scipy.io.wavfile
4
+ import numpy as np
5
+
6
+ # Load the MMS-TTS model and processor for Tibetan (bod)
7
+ model_id = "ganga4364/mms-tts-bod-female" # Replace with your fine-tuned model if necessary
8
+ processor = AutoProcessor.from_pretrained(model_id)
9
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id)
10
+
11
+ # Use the text-to-speech pipeline with the model
12
+ synthesiser = pipeline("text-to-speech", model=model, processor=processor, device=0) # Use GPU if available
13
+
14
+ # Function to perform TTS inference and save audio to a file
15
+ def generate_audio(input_text):
16
+ # Perform TTS inference
17
+ speech = synthesiser(input_text)
18
+
19
+ # Extract the audio data and sampling rate from the pipeline output
20
+ audio_data = np.array(speech["audio"])
21
+ sample_rate = speech["sampling_rate"]
22
+
23
+ # Save the audio to a file (e.g., 'output.wav')
24
+ file_path = "output_tibetan.wav"
25
+ scipy.io.wavfile.write(file_path, rate=sample_rate, data=audio_data.astype(np.int16)) # Ensure correct format
26
+
27
+ # Return the path to the audio file
28
+ return file_path
29
+
30
+ # Create the Gradio interface
31
+ iface = gr.Interface(
32
+ fn=generate_audio,
33
+ inputs="text", # Text input for the TTS
34
+ outputs="audio", # Output will be an audio file
35
+ title="Tibetan Text-to-Speech (MMS-TTS)",
36
+ description="Enter Tibetan text and generate speech using MMS-TTS."
37
+ )
38
+
39
+ # Launch the Gradio interface
40
+ iface.launch()