feat: visualize output with wavesurfer.js (#1)
Browse filesCo-authored-by: J-Petiot <[email protected]>
- .gitignore +91 -0
- LICENSE +21 -0
- app.py +86 -25
- assets/style.css +3 -0
- assets/template.html +46 -0
- requirements.txt +0 -2
.gitignore
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
env/
|
12 |
+
.env/
|
13 |
+
build/
|
14 |
+
develop-eggs/
|
15 |
+
dist/
|
16 |
+
downloads/
|
17 |
+
eggs/
|
18 |
+
.eggs/
|
19 |
+
lib/
|
20 |
+
lib64/
|
21 |
+
parts/
|
22 |
+
sdist/
|
23 |
+
var/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
|
28 |
+
# PyInstaller
|
29 |
+
# Usually these files are written by a python script from a template
|
30 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
31 |
+
*.manifest
|
32 |
+
*.spec
|
33 |
+
|
34 |
+
# Installer logs
|
35 |
+
pip-log.txt
|
36 |
+
pip-delete-this-directory.txt
|
37 |
+
|
38 |
+
# Unit test / coverage reports
|
39 |
+
htmlcov/
|
40 |
+
.tox/
|
41 |
+
.coverage
|
42 |
+
.coverage.*
|
43 |
+
.cache
|
44 |
+
nosetests.xml
|
45 |
+
coverage.xml
|
46 |
+
*,cover
|
47 |
+
.hypothesis/
|
48 |
+
|
49 |
+
# Translations
|
50 |
+
*.mo
|
51 |
+
*.pot
|
52 |
+
|
53 |
+
# Django stuff:
|
54 |
+
*.log
|
55 |
+
|
56 |
+
# Sphinx documentation
|
57 |
+
docs/_build/
|
58 |
+
|
59 |
+
# PyBuilder
|
60 |
+
target/
|
61 |
+
|
62 |
+
#Ipython Notebook
|
63 |
+
.ipynb_checkpoints
|
64 |
+
|
65 |
+
notebooks
|
66 |
+
|
67 |
+
experiments
|
68 |
+
*~
|
69 |
+
|
70 |
+
*.npy
|
71 |
+
*.pt
|
72 |
+
*events.out.tfevents*
|
73 |
+
*.csv
|
74 |
+
|
75 |
+
# PyCharm
|
76 |
+
.idea/
|
77 |
+
|
78 |
+
gh-pages
|
79 |
+
gh-pages.pub
|
80 |
+
|
81 |
+
*.zip
|
82 |
+
.mypy_cache/
|
83 |
+
.vscode/
|
84 |
+
|
85 |
+
**/lightning_logs/**
|
86 |
+
|
87 |
+
# Version Output
|
88 |
+
pyannote/audio/version.py
|
89 |
+
|
90 |
+
# vim
|
91 |
+
.vim
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2022 CNRS
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
app.py
CHANGED
@@ -1,22 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from huggingface_hub import HfApi
|
2 |
-
import matplotlib.pyplot as plt
|
3 |
import streamlit as st
|
4 |
from pyannote.audio import Pipeline
|
5 |
from pyannote.audio import Audio
|
6 |
-
from pyannote.core import
|
7 |
-
|
8 |
-
import
|
9 |
|
10 |
-
from matplotlib.backends.backend_agg import RendererAgg
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
PYANNOTE_LOGO = "https://avatars.githubusercontent.com/u/7559051?s=400&v=4"
|
15 |
EXCERPT = 30.0
|
16 |
|
17 |
st.set_page_config(
|
18 |
-
page_title="pyannote.audio pretrained pipelines",
|
19 |
-
|
|
|
20 |
|
21 |
st.sidebar.image(PYANNOTE_LOGO)
|
22 |
|
@@ -28,13 +64,17 @@ Upload an audio file and the first {EXCERPT:g} seconds will be processed automat
|
|
28 |
"""
|
29 |
)
|
30 |
|
31 |
-
PIPELINES = [
|
|
|
|
|
|
|
|
|
32 |
|
33 |
audio = Audio(sample_rate=16000, mono=True)
|
34 |
|
35 |
selected_pipeline = st.selectbox("", PIPELINES, index=0)
|
36 |
|
37 |
-
with st.spinner(
|
38 |
pipeline = Pipeline.from_pretrained(selected_pipeline)
|
39 |
|
40 |
uploaded_file = st.file_uploader("")
|
@@ -45,25 +85,46 @@ if uploaded_file is not None:
|
|
45 |
except RuntimeError as e:
|
46 |
st.error(e)
|
47 |
st.stop()
|
48 |
-
waveform, sample_rate = audio.crop(
|
|
|
|
|
49 |
file = {"waveform": waveform, "sample_rate": sample_rate, "uri": uploaded_file.name}
|
50 |
|
51 |
-
with st.spinner(
|
52 |
output = pipeline(file)
|
53 |
|
54 |
-
with
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
|
68 |
with io.StringIO() as fp:
|
69 |
output.write_rttm(fp)
|
|
|
1 |
+
# MIT License
|
2 |
+
#
|
3 |
+
# Copyright (c) 2022- CNRS
|
4 |
+
#
|
5 |
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
# of this software and associated documentation files (the "Software"), to deal
|
7 |
+
# in the Software without restriction, including without limitation the rights
|
8 |
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
# copies of the Software, and to permit persons to whom the Software is
|
10 |
+
# furnished to do so, subject to the following conditions:
|
11 |
+
#
|
12 |
+
# The above copyright notice and this permission notice shall be included in all
|
13 |
+
# copies or substantial portions of the Software.
|
14 |
+
#
|
15 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
# SOFTWARE.
|
22 |
+
|
23 |
+
|
24 |
+
import io
|
25 |
+
import base64
|
26 |
+
import numpy as np
|
27 |
+
import scipy.io.wavfile
|
28 |
+
from typing import Text
|
29 |
from huggingface_hub import HfApi
|
|
|
30 |
import streamlit as st
|
31 |
from pyannote.audio import Pipeline
|
32 |
from pyannote.audio import Audio
|
33 |
+
from pyannote.core import Segment
|
34 |
+
|
35 |
+
import streamlit.components.v1 as components
|
36 |
|
|
|
37 |
|
38 |
+
def to_base64(waveform: np.ndarray, sample_rate: int = 16000) -> Text:
|
39 |
+
"""Convert waveform to base64 data"""
|
40 |
+
waveform /= np.max(np.abs(waveform)) + 1e-8
|
41 |
+
with io.BytesIO() as content:
|
42 |
+
scipy.io.wavfile.write(content, sample_rate, waveform)
|
43 |
+
content.seek(0)
|
44 |
+
b64 = base64.b64encode(content.read()).decode()
|
45 |
+
b64 = f"data:audio/x-wav;base64,{b64}"
|
46 |
+
return b64
|
47 |
+
|
48 |
|
49 |
PYANNOTE_LOGO = "https://avatars.githubusercontent.com/u/7559051?s=400&v=4"
|
50 |
EXCERPT = 30.0
|
51 |
|
52 |
st.set_page_config(
|
53 |
+
page_title="pyannote.audio pretrained pipelines", page_icon=PYANNOTE_LOGO
|
54 |
+
)
|
55 |
+
|
56 |
|
57 |
st.sidebar.image(PYANNOTE_LOGO)
|
58 |
|
|
|
64 |
"""
|
65 |
)
|
66 |
|
67 |
+
PIPELINES = [
|
68 |
+
p.modelId
|
69 |
+
for p in HfApi().list_models(filter="pyannote-audio-pipeline")
|
70 |
+
if p.modelId.startswith("pyannote/")
|
71 |
+
]
|
72 |
|
73 |
audio = Audio(sample_rate=16000, mono=True)
|
74 |
|
75 |
selected_pipeline = st.selectbox("", PIPELINES, index=0)
|
76 |
|
77 |
+
with st.spinner("Loading pipeline..."):
|
78 |
pipeline = Pipeline.from_pretrained(selected_pipeline)
|
79 |
|
80 |
uploaded_file = st.file_uploader("")
|
|
|
85 |
except RuntimeError as e:
|
86 |
st.error(e)
|
87 |
st.stop()
|
88 |
+
waveform, sample_rate = audio.crop(
|
89 |
+
uploaded_file, Segment(0, min(duration, EXCERPT))
|
90 |
+
)
|
91 |
file = {"waveform": waveform, "sample_rate": sample_rate, "uri": uploaded_file.name}
|
92 |
|
93 |
+
with st.spinner("Running pipeline..."):
|
94 |
output = pipeline(file)
|
95 |
|
96 |
+
with open('assets/template.html') as html, open('assets/style.css') as css:
|
97 |
+
html_template = html.read()
|
98 |
+
st.markdown('<style>{}</style>'.format(css.read()), unsafe_allow_html=True)
|
99 |
+
|
100 |
+
colors = [
|
101 |
+
"#ffd70033",
|
102 |
+
"#00ffff33",
|
103 |
+
"#ff00ff33",
|
104 |
+
"#00ff0033",
|
105 |
+
"#9932cc33",
|
106 |
+
"#00bfff33",
|
107 |
+
"#ff7f5033",
|
108 |
+
"#66cdaa33",
|
109 |
+
]
|
110 |
+
num_colors = len(colors)
|
111 |
+
|
112 |
+
label2color = {label: colors[k % num_colors] for k, label in enumerate(sorted(output.labels()))}
|
113 |
+
|
114 |
+
BASE64 = to_base64(waveform.numpy().T)
|
115 |
+
|
116 |
+
REGIONS = ""
|
117 |
+
LEGENDS = ""
|
118 |
+
labels=[]
|
119 |
+
for segment, _, label in output.itertracks(yield_label=True):
|
120 |
+
REGIONS += f"var re = wavesurfer.addRegion({{start: {segment.start:g}, end: {segment.end:g}, color: '{label2color[label]}', resize : false, drag : false}});"
|
121 |
+
if not label in labels:
|
122 |
+
LEGENDS += f"<li><span style='background-color:{label2color[label]}'></span>{label}</li>"
|
123 |
+
labels.append(label)
|
124 |
+
|
125 |
+
html = html_template.replace("BASE64", BASE64).replace("REGIONS", REGIONS)
|
126 |
+
st.markdown("<div style='overflow : auto'><ul class='legend'>"+LEGENDS+"</ul></div>", unsafe_allow_html=True)
|
127 |
+
components.html(html, height=250, scrolling=True)
|
128 |
|
129 |
with io.StringIO() as fp:
|
130 |
output.write_rttm(fp)
|
assets/style.css
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.legend { list-style: none; margin: 0; padding: 0}
|
2 |
+
.legend li { float: left; margin : auto; margin-right: 10px; font-family : "Lato", "Trebuchet MS", Roboto, Helvetica, Arial, sans-serif; line-height: 1.8; font-size:20px;color : #444}
|
3 |
+
.legend span { border: 1px solid #ccc; float: left; width: 30px; height: 30px; margin: 2px; }
|
assets/template.html
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<script src="https://unpkg.com/wavesurfer.js"></script>
|
2 |
+
<script src="https://unpkg.com/wavesurfer.js/dist/plugin/wavesurfer.regions.min.js"></script>
|
3 |
+
<script src="https://unpkg.com/wavesurfer.js/dist/plugin/wavesurfer.timeline.min.js"></script>
|
4 |
+
<br>
|
5 |
+
<div id="waveform"></div>
|
6 |
+
<div id="timeline"></div>
|
7 |
+
<br>
|
8 |
+
<div><button onclick="play()" id="ppb">Play</button><div>
|
9 |
+
<script type="text/javascript">
|
10 |
+
var labels=[];
|
11 |
+
var wavesurfer = WaveSurfer.create({
|
12 |
+
container: '#waveform',
|
13 |
+
barGap: 2,
|
14 |
+
barHeight: 3,
|
15 |
+
barWidth: 3,
|
16 |
+
barRadius: 2,
|
17 |
+
plugins: [
|
18 |
+
WaveSurfer.regions.create({}),
|
19 |
+
WaveSurfer.timeline.create({
|
20 |
+
container: "#timeline",
|
21 |
+
notchPercentHeight: 40,
|
22 |
+
primaryColor: "#444",
|
23 |
+
primaryFontColor: "#444"
|
24 |
+
})
|
25 |
+
]
|
26 |
+
});
|
27 |
+
wavesurfer.load('BASE64');
|
28 |
+
wavesurfer.on('ready', function () {
|
29 |
+
wavesurfer.play();
|
30 |
+
});
|
31 |
+
wavesurfer.on('play',function() {
|
32 |
+
document.getElementById('ppb').innerHTML = "Pause";
|
33 |
+
});
|
34 |
+
wavesurfer.on('pause',function() {
|
35 |
+
document.getElementById('ppb').innerHTML = "Play";
|
36 |
+
});
|
37 |
+
REGIONS
|
38 |
+
document.addEventListener('keyup', event => {
|
39 |
+
if (event.code === 'Space') {
|
40 |
+
play();
|
41 |
+
}
|
42 |
+
})
|
43 |
+
function play(){
|
44 |
+
wavesurfer.isPlaying() ? wavesurfer.pause() : wavesurfer.play();
|
45 |
+
}
|
46 |
+
</script>
|
requirements.txt
CHANGED
@@ -1,4 +1,2 @@
|
|
1 |
git+https://github.com/pyannote/pyannote-audio.git@develop#egg=pyannote-audio
|
2 |
speechbrain == 0.5.10
|
3 |
-
matplotlib == 3.3.3
|
4 |
-
|
|
|
1 |
git+https://github.com/pyannote/pyannote-audio.git@develop#egg=pyannote-audio
|
2 |
speechbrain == 0.5.10
|
|
|
|