Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,7 @@ import torchaudio
|
|
9 |
processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
10 |
model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
11 |
audio_input = gr.inputs.Audio(label="صوت گفتار فارسی", type="filepath")
|
12 |
-
|
13 |
def ASR(audio):
|
14 |
pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
15 |
with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio_file:
|
@@ -22,7 +22,7 @@ def ASR(audio):
|
|
22 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
23 |
waveform = resampler(waveform)
|
24 |
# Convert the audio to a single channel
|
25 |
-
waveform =
|
26 |
# Convert the PyTorch tensor to a NumPy ndarray
|
27 |
audio_array = waveform.numpy()
|
28 |
#inputs = processor(audio_array, sampling_rate=16_000)
|
|
|
9 |
processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
10 |
model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
11 |
audio_input = gr.inputs.Audio(label="صوت گفتار فارسی", type="filepath")
|
12 |
+
text_output = gr.TextArea(label="متن فارسی", type="text")
|
13 |
def ASR(audio):
|
14 |
pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
|
15 |
with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio_file:
|
|
|
22 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
23 |
waveform = resampler(waveform)
|
24 |
# Convert the audio to a single channel
|
25 |
+
waveform = torch.mean(waveform, dim=0, keepdim=True)
|
26 |
# Convert the PyTorch tensor to a NumPy ndarray
|
27 |
audio_array = waveform.numpy()
|
28 |
#inputs = processor(audio_array, sampling_rate=16_000)
|