Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,7 @@ import torchaudio
|
|
8 |
|
9 |
processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
10 |
model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
11 |
-
audio_input = gr.
|
12 |
text_output = gr.TextArea(label="متن فارسی", type="text")
|
13 |
def ASR(audio):
|
14 |
pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
@@ -21,12 +21,10 @@ def ASR(audio):
|
|
21 |
# Resample the audio to 16kHz
|
22 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
23 |
waveform = resampler(waveform)
|
24 |
-
# Convert the audio to a single channel
|
25 |
-
waveform = torch.mean(waveform, dim=0, keepdim=True)
|
26 |
# Convert the PyTorch tensor to a NumPy ndarray
|
27 |
audio_array = waveform.numpy()
|
28 |
#inputs = processor(audio_array, sampling_rate=16_000)
|
29 |
text = pipe(audio_array)
|
30 |
return text
|
31 |
-
iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=
|
32 |
iface.launch(share=False)
|
|
|
8 |
|
9 |
processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
10 |
model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
11 |
+
audio_input = gr.Audio(label="صوت گفتار فارسی", type="filepath")
|
12 |
text_output = gr.TextArea(label="متن فارسی", type="text")
|
13 |
def ASR(audio):
|
14 |
pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
|
|
21 |
# Resample the audio to 16kHz
|
22 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
23 |
waveform = resampler(waveform)
|
|
|
|
|
24 |
# Convert the PyTorch tensor to a NumPy ndarray
|
25 |
audio_array = waveform.numpy()
|
26 |
#inputs = processor(audio_array, sampling_rate=16_000)
|
27 |
text = pipe(audio_array)
|
28 |
return text
|
29 |
+
iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=text_output)
|
30 |
iface.launch(share=False)
|