Spaces:

englissi
/

bgtts

Runtime error

App Files Files Community

englissi commited on 24 days ago

Commit

ff0bf3d

•

1 Parent(s): 805ef56

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -22

app.py CHANGED Viewed

@@ -1,38 +1,39 @@
 import gradio as gr
 import torch
-from transformers import AutoProcessor, AutoModelForCTC
-import soundfile as sf  # For handling audio input
-# Load model directly
 from transformers import AutoTokenizer, AutoModelForPreTraining
 tokenizer = AutoTokenizer.from_pretrained("Opit/mms_tts_bulgarian_finetuning")
 model = AutoModelForPreTraining.from_pretrained("Opit/mms_tts_bulgarian_finetuning")
-# ASR 변환 함수 (speech-to-text conversion)
-def asr_generate(audio):
-    # Load and process the audio file
-    speech, _ = sf.read(audio)
-    inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
     with torch.no_grad():
-        logits = model(**inputs).logits
-    # Get predicted IDs and decode the text
-    predicted_ids = torch.argmax(logits, dim=-1)
-    transcription = processor.batch_decode(predicted_ids)[0]
-    return transcription
-# Gradio 인터페이스 생성
 iface = gr.Interface(
-    fn=asr_generate,
-    inputs=gr.Audio(source="microphone", type="filepath"),
-    outputs="text",
-    title="Bulgarian Speech Recognition",
-    description="Upload or record audio in Bulgarian to get the transcription."
 )
-# 인터페이스 실행
 if __name__ == "__main__":
     iface.launch()

 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForPreTraining
+import soundfile as sf
+# Load the tokenizer and model for Bulgarian TTS (Text-to-Speech)
 tokenizer = AutoTokenizer.from_pretrained("Opit/mms_tts_bulgarian_finetuning")
 model = AutoModelForPreTraining.from_pretrained("Opit/mms_tts_bulgarian_finetuning")
+# TTS 변환 함수 (text-to-speech conversion)
+def tts_generate(text):
+    inputs = tokenizer(text, return_tensors="pt")
     with torch.no_grad():
+        outputs = model(**inputs)
+    # Convert the model outputs to audio format (you need to implement this depending on model specifics)
+    # This will depend on how the model's outputs are structured
+    # For now, let's assume you need a simple conversion to waveform/audio
+    # Placeholder: Assuming `outputs` contains audio data that can be returned directly as .wav format
+    # You might need to adjust this based on how the TTS model is structured and how it outputs speech
+    audio = outputs['logits']  # Adjust according to your model's output structure
+    # Return audio output (in numpy format) and the sample rate (this might be specific to your model)
+    return audio.numpy(), 22050  # Assuming the output is sampled at 22050 Hz
+# Create Gradio interface
 iface = gr.Interface(
+    fn=tts_generate,
+    inputs="text",
+    outputs="audio",
+    title="Bulgarian TTS (Text-to-Speech)",
+    description="Enter text to generate speech in Bulgarian."
 )
+# Run the interface
 if __name__ == "__main__":
     iface.launch()