capstonedubtrack
commited on
Commit
•
f191697
1
Parent(s):
c57334a
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
|
|
4 |
|
5 |
os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
|
6 |
os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
|
7 |
-
os.system('
|
8 |
|
9 |
title = "Automatic translation and dubbing for Indic Languages"
|
10 |
description = "A demo application to dub and translate videos spoken in Tamil, Hindi, Bengali and Telugu"
|
@@ -14,9 +14,49 @@ def inference(language,speed,voice,video ):
|
|
14 |
import moviepy.editor as mp
|
15 |
clip = mp.VideoFileClip(video)
|
16 |
clip.audio.write_audiofile(r"audio.wav")
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
return "./results/result_voice.mp4"
|
21 |
|
22 |
iface = gr.Interface(inference, inputs=[gr.inputs.Radio(["Tamil", "Hindi", "Bengali", "Telugu"], label = "Enter language to translate to"), gr.inputs.Radio(["Slow", "Fast"], label = "Enter speaking speed"), gr.inputs.Radio(["Male", "Female"], label = "Enter preferred voice"), gr.inputs.Video(type="mp4", source="upload", label="Video to be Translated", optional=False)], outputs=["video"], title=title, description=description, article=article, enable_queue=True)
|
|
|
4 |
|
5 |
os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
|
6 |
os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
|
7 |
+
os.system('move ./Wav2Lip/* .')
|
8 |
|
9 |
title = "Automatic translation and dubbing for Indic Languages"
|
10 |
description = "A demo application to dub and translate videos spoken in Tamil, Hindi, Bengali and Telugu"
|
|
|
14 |
import moviepy.editor as mp
|
15 |
clip = mp.VideoFileClip(video)
|
16 |
clip.audio.write_audiofile(r"audio.wav")
|
17 |
+
speechlist = []
|
18 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
19 |
+
import torch
|
20 |
+
import torchaudio
|
21 |
+
import librosa
|
22 |
+
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
|
23 |
+
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
|
24 |
+
def get_transcription(audio_path):
|
25 |
+
speech, sr = librosa.load(audio_path, sr=16000)
|
26 |
+
resampler = torchaudio.transforms.Resample(sr, 16000)
|
27 |
+
speech = resampler(speech)
|
28 |
+
input_values = processor(speech, return_tensors="pt", sampling_rate=16000)["input_values"]
|
29 |
+
logits = model(input_values)["logits"]
|
30 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
31 |
+
transcription = processor.decode(predicted_ids[0])
|
32 |
+
return transcription.lower()
|
33 |
+
speechtext = get_transcription("audio.wav")
|
34 |
+
speechlist.append(speechtext)
|
35 |
+
text = " ".join(speechlist)
|
36 |
+
from googletrans import Translator
|
37 |
+
from gtts import gTTS
|
38 |
+
translator= Translator()
|
39 |
+
if speed == "Slow":
|
40 |
+
con = True
|
41 |
+
elif speed == "Fast":
|
42 |
+
con = False
|
43 |
+
if language == "Hindi":
|
44 |
+
translation = translator.translate(text, src = 'en', dest='hi', slow=con)
|
45 |
+
tts = gTTS(translation.text, lang= "hi")
|
46 |
+
tts.save('input_audio.wav')
|
47 |
+
elif language == "Tamil":
|
48 |
+
translation = translator.translate(text, src = 'en', dest='ta', slow=con)
|
49 |
+
tts = gTTS(translation.text, lang= "ta")
|
50 |
+
tts.save('input_audio.wav')
|
51 |
+
elif language == "Bengali":
|
52 |
+
translation = translator.translate(text, src = 'en', dest='bn', slow=con)
|
53 |
+
tts = gTTS(translation.text, lang= "hi")
|
54 |
+
tts.save('input_audio.wav')
|
55 |
+
elif language == "Telugu":
|
56 |
+
translation = translator.translate(text, src = 'en', dest='te', slow=con)
|
57 |
+
tts = gTTS(translation.text, lang= "hi")
|
58 |
+
tts.save('input_audio.wav')
|
59 |
+
os.system("python inference.py --checkpoint_path ./wav2lip_gan.pth --video {} --input_audio.wav {}".format(video))
|
60 |
return "./results/result_voice.mp4"
|
61 |
|
62 |
iface = gr.Interface(inference, inputs=[gr.inputs.Radio(["Tamil", "Hindi", "Bengali", "Telugu"], label = "Enter language to translate to"), gr.inputs.Radio(["Slow", "Fast"], label = "Enter speaking speed"), gr.inputs.Radio(["Male", "Female"], label = "Enter preferred voice"), gr.inputs.Video(type="mp4", source="upload", label="Video to be Translated", optional=False)], outputs=["video"], title=title, description=description, article=article, enable_queue=True)
|