Spaces:
Running
Running
File size: 1,504 Bytes
44da960 20f17fc 44da960 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import requests
from deep_translator import GoogleTranslator
import gradio as gr
import soundfile as sf
def speech_translation(audio, language):
if audio is None:
return "No audio input provided!", "No audio input provided!"
# Convert audio to .wav format if not already
if not audio.endswith(".wav"):
wav_data, samplerate = sf.read(audio)
sf.write("temp_audio.wav", wav_data, samplerate)
audio_file = "temp_audio.wav"
else:
audio_file = audio
# ASR processing
files = {
'file': open(audio_file, "rb"),
'language': (None, language),
'vtt': (None, 'true'),
}
response = requests.post('https://asr.iitm.ac.in/internal/asr/decode', files=files)
print(response.json())
try:
asr_output = response.json()['transcript']
except:
asr_output = "Error in ASR processing"
asr_output = asr_output.replace("।", "")
asr_output = asr_output.replace(".", "")
translator = GoogleTranslator(source=language, target='en')
translation = translator.translate(asr_output)
return translation
iface = gr.Interface(
fn=speech_translation,
inputs=[
gr.Audio(type="filepath", label="Record your speech"),
gr.Dropdown(["telugu", "hindi", "marathi", "bengali"], label="Select Language")
],
outputs=["text"],
title="Speech Translation",
description="Record your speech and get the English translation.",
)
iface.launch(share=True) |