whispy commited on
Commit
3c8bd48
β€’
1 Parent(s): 8049de7

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +108 -0
  2. packages.txt +1 -0
  3. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ import gradio as gr
4
+ import pytube as pt
5
+ from transformers import pipeline
6
+
7
+ asr = pipeline(
8
+ task="automatic-speech-recognition",
9
+ model="whispy/whisper_hf",
10
+ chunk_length_s=30,
11
+ device="cpu",
12
+ )
13
+
14
+ summarizer = pipeline(
15
+ "summarization",
16
+ model="it5/it5-efficient-small-el32-news-summarization",
17
+ )
18
+
19
+ translator = pipeline(
20
+ "translation",
21
+ model="Helsinki-NLP/opus-mt-it-en")
22
+
23
+ def transcribe(microphone, file_upload):
24
+ warn_output = ""
25
+ if (microphone is not None) and (file_upload is not None):
26
+ warn_output = (
27
+ "WARNING: You've uploaded an audio file and used the microphone. "
28
+ "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
29
+ )
30
+
31
+ elif (microphone is None) and (file_upload is None):
32
+ return "ERROR: You have to either use the microphone or upload an audio file"
33
+
34
+ file = microphone if microphone is not None else file_upload
35
+
36
+ text = asr(file)["text"]
37
+
38
+ translate = translator(text)
39
+ translate = translate[0]["translation_text"]
40
+
41
+ return warn_output + text, translate
42
+
43
+ def _return_yt_html_embed(yt_url):
44
+ video_id = yt_url.split("?v=")[-1]
45
+ HTML_str = (
46
+ f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
47
+ " </center>"
48
+ )
49
+ return HTML_str
50
+
51
+
52
+ def yt_transcribe(yt_url):
53
+ yt = pt.YouTube(yt_url)
54
+ html_embed_str = _return_yt_html_embed(yt_url)
55
+ stream = yt.streams.filter(only_audio=True)[0]
56
+ stream.download(filename="audio.mp3")
57
+
58
+ text = asr("audio.mp3")["text"]
59
+
60
+ summary = summarizer(text)
61
+ summary = summary[0]["summary_text"]
62
+
63
+ translate = translator(summary)
64
+ translate = translate[0]["translation_text"]
65
+
66
+ return html_embed_str, text, summary, translate
67
+
68
+ demo = gr.Blocks()
69
+
70
+ mf_transcribe = gr.Interface(
71
+ fn=transcribe,
72
+ inputs=[
73
+ gr.inputs.Audio(source="microphone", type="filepath", optional=True),
74
+ gr.inputs.Audio(source="upload", type="filepath", optional=True),
75
+ ],
76
+ outputs=["text", "text"],
77
+ layout="horizontal",
78
+ theme="huggingface",
79
+ title="Whisper Demo: Transcribe and Translate Italian Audio",
80
+ description=(
81
+ "Transcribe and Translate long-form microphone or audio inputs with the click of a button! Demo uses the the fine-tuned"
82
+ f" [whispy/whisper_hf](https://huggingface.co/whispy/whisper_hf) and πŸ€— Transformers to transcribe audio files"
83
+ " of arbitrary length. It also uses another model for the translation."
84
+ ),
85
+ allow_flagging="never",
86
+ )
87
+
88
+ yt_transcribe = gr.Interface(
89
+ fn=yt_transcribe,
90
+ inputs=[gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL")],
91
+ outputs=["html", "text", "text", "text"],
92
+ layout="horizontal",
93
+ theme="huggingface",
94
+ title="Whisper Demo: Transcribe, Summarize and Translate YouTube",
95
+ description=(
96
+ "Transcribe, Summarize and Translate long-form YouTube videos with the click of a button! Demo uses the the fine-tuned "
97
+ f" [whispy/whisper_hf](https://huggingface.co/whispy/whisper_hf) and πŸ€— Transformers to transcribe audio files of"
98
+ " arbitrary length. It also uses other two models to first summarize and then translate the text input. You can try with the following examples: "
99
+ f" [Video1](https://www.youtube.com/watch?v=xhWhyu8cBTk)"
100
+ f" [Video2](https://www.youtube.com/watch?v=C6Vw_Z3t_2U)"
101
+ ),
102
+ allow_flagging="never",
103
+ )
104
+
105
+ with demo:
106
+ gr.TabbedInterface([mf_transcribe, yt_transcribe], ["Transcribe and Translate Audio", "Transcribe, Summarize and Translate YouTube"])
107
+
108
+ demo.launch(enable_queue=True)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ pytube
4
+ sentencepiece