capstonedubtrack commited on
Commit
f191697
1 Parent(s): c57334a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -4
app.py CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
4
 
5
  os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
6
  os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
7
- os.system('mv ./Wav2Lip/* .')
8
 
9
  title = "Automatic translation and dubbing for Indic Languages"
10
  description = "A demo application to dub and translate videos spoken in Tamil, Hindi, Bengali and Telugu"
@@ -14,9 +14,49 @@ def inference(language,speed,voice,video ):
14
  import moviepy.editor as mp
15
  clip = mp.VideoFileClip(video)
16
  clip.audio.write_audiofile(r"audio.wav")
17
-
18
- os.system("python inference.py --checkpoint_path ./wav2lip_gan.pth --face {} --audio {}".format(face, audio))
19
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  return "./results/result_voice.mp4"
21
 
22
  iface = gr.Interface(inference, inputs=[gr.inputs.Radio(["Tamil", "Hindi", "Bengali", "Telugu"], label = "Enter language to translate to"), gr.inputs.Radio(["Slow", "Fast"], label = "Enter speaking speed"), gr.inputs.Radio(["Male", "Female"], label = "Enter preferred voice"), gr.inputs.Video(type="mp4", source="upload", label="Video to be Translated", optional=False)], outputs=["video"], title=title, description=description, article=article, enable_queue=True)
 
4
 
5
  os.system('git clone https://github.com/Rudrabha/Wav2Lip.git')
6
  os.system('curl -o ./Wav2Lip/face_detection/detection/sfd/s3fd.pth https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth')
7
+ os.system('move ./Wav2Lip/* .')
8
 
9
  title = "Automatic translation and dubbing for Indic Languages"
10
  description = "A demo application to dub and translate videos spoken in Tamil, Hindi, Bengali and Telugu"
 
14
  import moviepy.editor as mp
15
  clip = mp.VideoFileClip(video)
16
  clip.audio.write_audiofile(r"audio.wav")
17
+ speechlist = []
18
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
19
+ import torch
20
+ import torchaudio
21
+ import librosa
22
+ processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
23
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
24
+ def get_transcription(audio_path):
25
+ speech, sr = librosa.load(audio_path, sr=16000)
26
+ resampler = torchaudio.transforms.Resample(sr, 16000)
27
+ speech = resampler(speech)
28
+ input_values = processor(speech, return_tensors="pt", sampling_rate=16000)["input_values"]
29
+ logits = model(input_values)["logits"]
30
+ predicted_ids = torch.argmax(logits, dim=-1)
31
+ transcription = processor.decode(predicted_ids[0])
32
+ return transcription.lower()
33
+ speechtext = get_transcription("audio.wav")
34
+ speechlist.append(speechtext)
35
+ text = " ".join(speechlist)
36
+ from googletrans import Translator
37
+ from gtts import gTTS
38
+ translator= Translator()
39
+ if speed == "Slow":
40
+ con = True
41
+ elif speed == "Fast":
42
+ con = False
43
+ if language == "Hindi":
44
+ translation = translator.translate(text, src = 'en', dest='hi', slow=con)
45
+ tts = gTTS(translation.text, lang= "hi")
46
+ tts.save('input_audio.wav')
47
+ elif language == "Tamil":
48
+ translation = translator.translate(text, src = 'en', dest='ta', slow=con)
49
+ tts = gTTS(translation.text, lang= "ta")
50
+ tts.save('input_audio.wav')
51
+ elif language == "Bengali":
52
+ translation = translator.translate(text, src = 'en', dest='bn', slow=con)
53
+ tts = gTTS(translation.text, lang= "hi")
54
+ tts.save('input_audio.wav')
55
+ elif language == "Telugu":
56
+ translation = translator.translate(text, src = 'en', dest='te', slow=con)
57
+ tts = gTTS(translation.text, lang= "hi")
58
+ tts.save('input_audio.wav')
59
+ os.system("python inference.py --checkpoint_path ./wav2lip_gan.pth --video {} --input_audio.wav {}".format(video))
60
  return "./results/result_voice.mp4"
61
 
62
  iface = gr.Interface(inference, inputs=[gr.inputs.Radio(["Tamil", "Hindi", "Bengali", "Telugu"], label = "Enter language to translate to"), gr.inputs.Radio(["Slow", "Fast"], label = "Enter speaking speed"), gr.inputs.Radio(["Male", "Female"], label = "Enter preferred voice"), gr.inputs.Video(type="mp4", source="upload", label="Video to be Translated", optional=False)], outputs=["video"], title=title, description=description, article=article, enable_queue=True)