englissi commited on
Commit
ff0bf3d
β€’
1 Parent(s): 805ef56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -22
app.py CHANGED
@@ -1,38 +1,39 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoProcessor, AutoModelForCTC
4
- import soundfile as sf # For handling audio input
5
-
6
- # Load model directly
7
  from transformers import AutoTokenizer, AutoModelForPreTraining
 
 
 
8
  tokenizer = AutoTokenizer.from_pretrained("Opit/mms_tts_bulgarian_finetuning")
9
  model = AutoModelForPreTraining.from_pretrained("Opit/mms_tts_bulgarian_finetuning")
10
 
11
- # ASR λ³€ν™˜ ν•¨μˆ˜ (speech-to-text conversion)
12
- def asr_generate(audio):
13
- # Load and process the audio file
14
- speech, _ = sf.read(audio)
15
- inputs = processor(speech, sampling_rate=16000, return_tensors="pt", padding=True)
16
 
17
  with torch.no_grad():
18
- logits = model(**inputs).logits
 
 
 
 
19
 
20
- # Get predicted IDs and decode the text
21
- predicted_ids = torch.argmax(logits, dim=-1)
22
- transcription = processor.batch_decode(predicted_ids)[0]
23
-
24
- return transcription
25
 
 
 
26
 
27
- # Gradio μΈν„°νŽ˜μ΄μŠ€ 생성
28
  iface = gr.Interface(
29
- fn=asr_generate,
30
- inputs=gr.Audio(source="microphone", type="filepath"),
31
- outputs="text",
32
- title="Bulgarian Speech Recognition",
33
- description="Upload or record audio in Bulgarian to get the transcription."
34
  )
35
 
36
- # μΈν„°νŽ˜μ΄μŠ€ μ‹€ν–‰
37
  if __name__ == "__main__":
38
  iface.launch()
 
1
  import gradio as gr
2
  import torch
 
 
 
 
3
  from transformers import AutoTokenizer, AutoModelForPreTraining
4
+ import soundfile as sf
5
+
6
+ # Load the tokenizer and model for Bulgarian TTS (Text-to-Speech)
7
  tokenizer = AutoTokenizer.from_pretrained("Opit/mms_tts_bulgarian_finetuning")
8
  model = AutoModelForPreTraining.from_pretrained("Opit/mms_tts_bulgarian_finetuning")
9
 
10
+ # TTS λ³€ν™˜ ν•¨μˆ˜ (text-to-speech conversion)
11
+ def tts_generate(text):
12
+ inputs = tokenizer(text, return_tensors="pt")
 
 
13
 
14
  with torch.no_grad():
15
+ outputs = model(**inputs)
16
+
17
+ # Convert the model outputs to audio format (you need to implement this depending on model specifics)
18
+ # This will depend on how the model's outputs are structured
19
+ # For now, let's assume you need a simple conversion to waveform/audio
20
 
21
+ # Placeholder: Assuming `outputs` contains audio data that can be returned directly as .wav format
22
+ # You might need to adjust this based on how the TTS model is structured and how it outputs speech
23
+ audio = outputs['logits'] # Adjust according to your model's output structure
 
 
24
 
25
+ # Return audio output (in numpy format) and the sample rate (this might be specific to your model)
26
+ return audio.numpy(), 22050 # Assuming the output is sampled at 22050 Hz
27
 
28
+ # Create Gradio interface
29
  iface = gr.Interface(
30
+ fn=tts_generate,
31
+ inputs="text",
32
+ outputs="audio",
33
+ title="Bulgarian TTS (Text-to-Speech)",
34
+ description="Enter text to generate speech in Bulgarian."
35
  )
36
 
37
+ # Run the interface
38
  if __name__ == "__main__":
39
  iface.launch()