Spaces:

capstonedubtrack
/

Indiclanguagedubbing

Sleeping

App Files Files Community

capstonedubtrack commited on Apr 27, 2022

Commit

f191697

•

1 Parent(s): c57334a

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -4

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import gradio as gr
 os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
 os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
-os.system('mv ./Wav2Lip/* .')
 title = "Automatic translation and dubbing for Indic Languages"
 description = "A demo application to dub and translate videos spoken in Tamil, Hindi, Bengali and Telugu"
@@ -14,9 +14,49 @@ def inference(language,speed,voice,video ):
     import moviepy.editor as mp
     clip = mp.VideoFileClip(video)
     clip.audio.write_audiofile(r"audio.wav")
-    os.system("python inference.py --checkpoint_path ./wav2lip_gan.pth --face {} --audio {}".format(face, audio))
     return "./results/result_voice.mp4"
 iface = gr.Interface(inference, inputs=[gr.inputs.Radio(["Tamil", "Hindi", "Bengali", "Telugu"], label = "Enter language to translate to"), gr.inputs.Radio(["Slow", "Fast"], label = "Enter speaking speed"), gr.inputs.Radio(["Male", "Female"], label = "Enter preferred voice"), gr.inputs.Video(type="mp4", source="upload", label="Video to be Translated", optional=False)], outputs=["video"], title=title, description=description, article=article, enable_queue=True)

 os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
 os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
+os.system('move ./Wav2Lip/* .')
 title = "Automatic translation and dubbing for Indic Languages"
 description = "A demo application to dub and translate videos spoken in Tamil, Hindi, Bengali and Telugu"
     import moviepy.editor as mp
     clip = mp.VideoFileClip(video)
     clip.audio.write_audiofile(r"audio.wav")
+    speechlist = []
+    from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
+    import torch
+    import torchaudio
+    import librosa
+    processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
+    model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
+    def get_transcription(audio_path):
+      speech, sr = librosa.load(audio_path, sr=16000)
+      resampler = torchaudio.transforms.Resample(sr, 16000)
+      speech = resampler(speech)
+      input_values = processor(speech, return_tensors="pt", sampling_rate=16000)["input_values"]
+      logits = model(input_values)["logits"]
+      predicted_ids = torch.argmax(logits, dim=-1)
+      transcription = processor.decode(predicted_ids[0])
+      return transcription.lower()
+    speechtext = get_transcription("audio.wav")
+    speechlist.append(speechtext)
+    text = " ".join(speechlist)
+    from googletrans import Translator
+    from gtts import gTTS
+    translator= Translator()
+    if speed == "Slow":
+        con = True
+    elif speed == "Fast":
+        con = False
+    if language == "Hindi":
+        translation = translator.translate(text, src = 'en', dest='hi', slow=con)
+        tts = gTTS(translation.text, lang= "hi")
+        tts.save('input_audio.wav')
+    elif language == "Tamil":
+        translation = translator.translate(text, src = 'en', dest='ta', slow=con)
+        tts = gTTS(translation.text, lang= "ta")
+        tts.save('input_audio.wav')
+    elif language == "Bengali":
+        translation = translator.translate(text, src = 'en', dest='bn', slow=con)
+        tts = gTTS(translation.text, lang= "hi")
+        tts.save('input_audio.wav')
+    elif language == "Telugu":
+        translation = translator.translate(text, src = 'en', dest='te', slow=con)
+        tts = gTTS(translation.text, lang= "hi")
+        tts.save('input_audio.wav')
+    os.system("python inference.py --checkpoint_path ./wav2lip_gan.pth --video {} --input_audio.wav {}".format(video))
     return "./results/result_voice.mp4"
 iface = gr.Interface(inference, inputs=[gr.inputs.Radio(["Tamil", "Hindi", "Bengali", "Telugu"], label = "Enter language to translate to"), gr.inputs.Radio(["Slow", "Fast"], label = "Enter speaking speed"), gr.inputs.Radio(["Male", "Female"], label = "Enter preferred voice"), gr.inputs.Video(type="mp4", source="upload", label="Video to be Translated", optional=False)], outputs=["video"], title=title, description=description, article=article, enable_queue=True)