Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -15,19 +15,18 @@ TITLE = """
|
|
15 |
<h1 style="font-weight: 900; margin-bottom: 7px;">
|
16 |
Auto-AVSR: Audio-Visual Speech Recognition with Automatic Labels
|
17 |
</h1>
|
18 |
-
<h3 style="font-weight: 450; font-size: 1rem; margin: 0rem">
|
19 |
-
[<a href="https://arxiv.org/abs/2303.14307" style="color:blue;">arXiv</a>]
|
20 |
-
[<a href="https://github.com/mpc001/auto_avsr" style="color:blue;">Code</a>]
|
21 |
-
</h3>
|
22 |
</div>
|
23 |
<p style="margin-bottom: 10px; font-size: 94%">
|
24 |
-
Want to
|
25 |
</p>
|
26 |
</div>
|
27 |
"""
|
28 |
|
29 |
ARTICLE = """
|
30 |
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
|
|
|
|
|
|
|
31 |
<p>
|
32 |
Server busy? You can also run on <a href="https://colab.research.google.com/drive/1jfb6e4xxhXHbmQf-nncdLno1u0b4j614?usp=sharing">Google Colab</a>
|
33 |
</p>
|
@@ -73,32 +72,20 @@ pipelines = {
|
|
73 |
print("Step 0. Model has been loaded.")
|
74 |
|
75 |
def fn(pipeline_type, filename):
|
76 |
-
print("Step 0. Video has been uploaded.")
|
77 |
selected_pipeline_instance = pipelines[pipeline_type]
|
78 |
-
print("Step 1. Video has been converted.")
|
79 |
landmarks = selected_pipeline_instance.process_landmarks(filename, landmarks_filename=None)
|
80 |
-
print("Step 2. Landmarks have been detected.")
|
81 |
data = selected_pipeline_instance.dataloader.load_data(filename, landmarks)
|
82 |
-
print("Step 3. Data has been preprocessed.")
|
83 |
transcript = selected_pipeline_instance.model.infer(data)
|
84 |
-
print("Step 4. Inference has been done.")
|
85 |
-
print(f"transcript: {transcript}")
|
86 |
return transcript
|
87 |
|
88 |
demo = gr.Blocks(css=CSS)
|
89 |
|
90 |
with demo:
|
91 |
-
|
92 |
gr.HTML(TITLE)
|
93 |
-
|
94 |
-
|
95 |
dropdown_list = gr.inputs.Dropdown(["ASR", "VSR(mediapipe)", "AVSR(mediapipe)"], label="model")
|
96 |
video_file = gr.Video(label="INPUT VIDEO", include_audio=True)
|
97 |
text = gr.Textbox(label="PREDICTION")
|
98 |
btn = gr.Button("Submit").style(full_width=True)
|
99 |
-
|
100 |
btn.click(fn, inputs=[dropdown_list, video_file], outputs=text)
|
101 |
-
|
102 |
gr.HTML(ARTICLE)
|
103 |
-
|
104 |
demo.launch()
|
|
|
15 |
<h1 style="font-weight: 900; margin-bottom: 7px;">
|
16 |
Auto-AVSR: Audio-Visual Speech Recognition with Automatic Labels
|
17 |
</h1>
|
|
|
|
|
|
|
|
|
18 |
</div>
|
19 |
<p style="margin-bottom: 10px; font-size: 94%">
|
20 |
+
Want to recognize content in a noisy environment?<br>Our Auto-AVSR models are here to transcribe your answers from audio or visual information!
|
21 |
</p>
|
22 |
</div>
|
23 |
"""
|
24 |
|
25 |
ARTICLE = """
|
26 |
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
|
27 |
+
<p>
|
28 |
+
Want to look into models? You can find our [<a href="https://github.com/mpc001/auto_avsr">training code</a>] and [<a href="https://arxiv.org/abs/2303.14307">paper</a>].
|
29 |
+
</p>
|
30 |
<p>
|
31 |
Server busy? You can also run on <a href="https://colab.research.google.com/drive/1jfb6e4xxhXHbmQf-nncdLno1u0b4j614?usp=sharing">Google Colab</a>
|
32 |
</p>
|
|
|
72 |
print("Step 0. Model has been loaded.")
|
73 |
|
74 |
def fn(pipeline_type, filename):
|
|
|
75 |
selected_pipeline_instance = pipelines[pipeline_type]
|
|
|
76 |
landmarks = selected_pipeline_instance.process_landmarks(filename, landmarks_filename=None)
|
|
|
77 |
data = selected_pipeline_instance.dataloader.load_data(filename, landmarks)
|
|
|
78 |
transcript = selected_pipeline_instance.model.infer(data)
|
|
|
|
|
79 |
return transcript
|
80 |
|
81 |
demo = gr.Blocks(css=CSS)
|
82 |
|
83 |
with demo:
|
|
|
84 |
gr.HTML(TITLE)
|
|
|
|
|
85 |
dropdown_list = gr.inputs.Dropdown(["ASR", "VSR(mediapipe)", "AVSR(mediapipe)"], label="model")
|
86 |
video_file = gr.Video(label="INPUT VIDEO", include_audio=True)
|
87 |
text = gr.Textbox(label="PREDICTION")
|
88 |
btn = gr.Button("Submit").style(full_width=True)
|
|
|
89 |
btn.click(fn, inputs=[dropdown_list, video_file], outputs=text)
|
|
|
90 |
gr.HTML(ARTICLE)
|
|
|
91 |
demo.launch()
|