Hervé BREDIN jpetiot commited on
Commit
c174364
1 Parent(s): ebc74bd

feat: visualize output with wavesurfer.js (#1)

Browse files

Co-authored-by: J-Petiot <[email protected]>

Files changed (6) hide show
  1. .gitignore +91 -0
  2. LICENSE +21 -0
  3. app.py +86 -25
  4. assets/style.css +3 -0
  5. assets/template.html +46 -0
  6. requirements.txt +0 -2
.gitignore ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ env/
12
+ .env/
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *,cover
47
+ .hypothesis/
48
+
49
+ # Translations
50
+ *.mo
51
+ *.pot
52
+
53
+ # Django stuff:
54
+ *.log
55
+
56
+ # Sphinx documentation
57
+ docs/_build/
58
+
59
+ # PyBuilder
60
+ target/
61
+
62
+ #Ipython Notebook
63
+ .ipynb_checkpoints
64
+
65
+ notebooks
66
+
67
+ experiments
68
+ *~
69
+
70
+ *.npy
71
+ *.pt
72
+ *events.out.tfevents*
73
+ *.csv
74
+
75
+ # PyCharm
76
+ .idea/
77
+
78
+ gh-pages
79
+ gh-pages.pub
80
+
81
+ *.zip
82
+ .mypy_cache/
83
+ .vscode/
84
+
85
+ **/lightning_logs/**
86
+
87
+ # Version Output
88
+ pyannote/audio/version.py
89
+
90
+ # vim
91
+ .vim
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 CNRS
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
app.py CHANGED
@@ -1,22 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from huggingface_hub import HfApi
2
- import matplotlib.pyplot as plt
3
  import streamlit as st
4
  from pyannote.audio import Pipeline
5
  from pyannote.audio import Audio
6
- from pyannote.core import notebook, Segment
7
- import io
8
- import base64
9
 
10
- from matplotlib.backends.backend_agg import RendererAgg
11
 
12
- _lock = RendererAgg.lock
 
 
 
 
 
 
 
 
 
13
 
14
  PYANNOTE_LOGO = "https://avatars.githubusercontent.com/u/7559051?s=400&v=4"
15
  EXCERPT = 30.0
16
 
17
  st.set_page_config(
18
- page_title="pyannote.audio pretrained pipelines",
19
- page_icon=PYANNOTE_LOGO)
 
20
 
21
  st.sidebar.image(PYANNOTE_LOGO)
22
 
@@ -28,13 +64,17 @@ Upload an audio file and the first {EXCERPT:g} seconds will be processed automat
28
  """
29
  )
30
 
31
- PIPELINES = [p.modelId for p in HfApi().list_models(filter="pyannote-audio-pipeline") if p.modelId.startswith("pyannote/")]
 
 
 
 
32
 
33
  audio = Audio(sample_rate=16000, mono=True)
34
 
35
  selected_pipeline = st.selectbox("", PIPELINES, index=0)
36
 
37
- with st.spinner('Loading pipeline...'):
38
  pipeline = Pipeline.from_pretrained(selected_pipeline)
39
 
40
  uploaded_file = st.file_uploader("")
@@ -45,25 +85,46 @@ if uploaded_file is not None:
45
  except RuntimeError as e:
46
  st.error(e)
47
  st.stop()
48
- waveform, sample_rate = audio.crop(uploaded_file, Segment(0, min(duration, EXCERPT)))
 
 
49
  file = {"waveform": waveform, "sample_rate": sample_rate, "uri": uploaded_file.name}
50
 
51
- with st.spinner('Running pipeline...'):
52
  output = pipeline(file)
53
 
54
- with _lock:
55
-
56
- notebook.reset()
57
- notebook.crop = Segment(0, min(duration, EXCERPT))
58
-
59
- fig, ax = plt.subplots(nrows=1, ncols=1)
60
- fig.set_figwidth(12)
61
- fig.set_figheight(2.0)
62
- notebook.plot_annotation(output, ax=ax, time=True, legend=True)
63
-
64
- plt.tight_layout()
65
- st.pyplot(fig=fig, clear_figure=True)
66
- plt.close(fig)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  with io.StringIO() as fp:
69
  output.write_rttm(fp)
 
1
+ # MIT License
2
+ #
3
+ # Copyright (c) 2022- CNRS
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ # of this software and associated documentation files (the "Software"), to deal
7
+ # in the Software without restriction, including without limitation the rights
8
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ # copies of the Software, and to permit persons to whom the Software is
10
+ # furnished to do so, subject to the following conditions:
11
+ #
12
+ # The above copyright notice and this permission notice shall be included in all
13
+ # copies or substantial portions of the Software.
14
+ #
15
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ # SOFTWARE.
22
+
23
+
24
+ import io
25
+ import base64
26
+ import numpy as np
27
+ import scipy.io.wavfile
28
+ from typing import Text
29
  from huggingface_hub import HfApi
 
30
  import streamlit as st
31
  from pyannote.audio import Pipeline
32
  from pyannote.audio import Audio
33
+ from pyannote.core import Segment
34
+
35
+ import streamlit.components.v1 as components
36
 
 
37
 
38
+ def to_base64(waveform: np.ndarray, sample_rate: int = 16000) -> Text:
39
+ """Convert waveform to base64 data"""
40
+ waveform /= np.max(np.abs(waveform)) + 1e-8
41
+ with io.BytesIO() as content:
42
+ scipy.io.wavfile.write(content, sample_rate, waveform)
43
+ content.seek(0)
44
+ b64 = base64.b64encode(content.read()).decode()
45
+ b64 = f"data:audio/x-wav;base64,{b64}"
46
+ return b64
47
+
48
 
49
  PYANNOTE_LOGO = "https://avatars.githubusercontent.com/u/7559051?s=400&v=4"
50
  EXCERPT = 30.0
51
 
52
  st.set_page_config(
53
+ page_title="pyannote.audio pretrained pipelines", page_icon=PYANNOTE_LOGO
54
+ )
55
+
56
 
57
  st.sidebar.image(PYANNOTE_LOGO)
58
 
 
64
  """
65
  )
66
 
67
+ PIPELINES = [
68
+ p.modelId
69
+ for p in HfApi().list_models(filter="pyannote-audio-pipeline")
70
+ if p.modelId.startswith("pyannote/")
71
+ ]
72
 
73
  audio = Audio(sample_rate=16000, mono=True)
74
 
75
  selected_pipeline = st.selectbox("", PIPELINES, index=0)
76
 
77
+ with st.spinner("Loading pipeline..."):
78
  pipeline = Pipeline.from_pretrained(selected_pipeline)
79
 
80
  uploaded_file = st.file_uploader("")
 
85
  except RuntimeError as e:
86
  st.error(e)
87
  st.stop()
88
+ waveform, sample_rate = audio.crop(
89
+ uploaded_file, Segment(0, min(duration, EXCERPT))
90
+ )
91
  file = {"waveform": waveform, "sample_rate": sample_rate, "uri": uploaded_file.name}
92
 
93
+ with st.spinner("Running pipeline..."):
94
  output = pipeline(file)
95
 
96
+ with open('assets/template.html') as html, open('assets/style.css') as css:
97
+ html_template = html.read()
98
+ st.markdown('<style>{}</style>'.format(css.read()), unsafe_allow_html=True)
99
+
100
+ colors = [
101
+ "#ffd70033",
102
+ "#00ffff33",
103
+ "#ff00ff33",
104
+ "#00ff0033",
105
+ "#9932cc33",
106
+ "#00bfff33",
107
+ "#ff7f5033",
108
+ "#66cdaa33",
109
+ ]
110
+ num_colors = len(colors)
111
+
112
+ label2color = {label: colors[k % num_colors] for k, label in enumerate(sorted(output.labels()))}
113
+
114
+ BASE64 = to_base64(waveform.numpy().T)
115
+
116
+ REGIONS = ""
117
+ LEGENDS = ""
118
+ labels=[]
119
+ for segment, _, label in output.itertracks(yield_label=True):
120
+ REGIONS += f"var re = wavesurfer.addRegion({{start: {segment.start:g}, end: {segment.end:g}, color: '{label2color[label]}', resize : false, drag : false}});"
121
+ if not label in labels:
122
+ LEGENDS += f"<li><span style='background-color:{label2color[label]}'></span>{label}</li>"
123
+ labels.append(label)
124
+
125
+ html = html_template.replace("BASE64", BASE64).replace("REGIONS", REGIONS)
126
+ st.markdown("<div style='overflow : auto'><ul class='legend'>"+LEGENDS+"</ul></div>", unsafe_allow_html=True)
127
+ components.html(html, height=250, scrolling=True)
128
 
129
  with io.StringIO() as fp:
130
  output.write_rttm(fp)
assets/style.css ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .legend { list-style: none; margin: 0; padding: 0}
2
+ .legend li { float: left; margin : auto; margin-right: 10px; font-family : "Lato", "Trebuchet MS", Roboto, Helvetica, Arial, sans-serif; line-height: 1.8; font-size:20px;color : #444}
3
+ .legend span { border: 1px solid #ccc; float: left; width: 30px; height: 30px; margin: 2px; }
assets/template.html ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <script src="https://unpkg.com/wavesurfer.js"></script>
2
+ <script src="https://unpkg.com/wavesurfer.js/dist/plugin/wavesurfer.regions.min.js"></script>
3
+ <script src="https://unpkg.com/wavesurfer.js/dist/plugin/wavesurfer.timeline.min.js"></script>
4
+ <br>
5
+ <div id="waveform"></div>
6
+ <div id="timeline"></div>
7
+ <br>
8
+ <div><button onclick="play()" id="ppb">Play</button><div>
9
+ <script type="text/javascript">
10
+ var labels=[];
11
+ var wavesurfer = WaveSurfer.create({
12
+ container: '#waveform',
13
+ barGap: 2,
14
+ barHeight: 3,
15
+ barWidth: 3,
16
+ barRadius: 2,
17
+ plugins: [
18
+ WaveSurfer.regions.create({}),
19
+ WaveSurfer.timeline.create({
20
+ container: "#timeline",
21
+ notchPercentHeight: 40,
22
+ primaryColor: "#444",
23
+ primaryFontColor: "#444"
24
+ })
25
+ ]
26
+ });
27
+ wavesurfer.load('BASE64');
28
+ wavesurfer.on('ready', function () {
29
+ wavesurfer.play();
30
+ });
31
+ wavesurfer.on('play',function() {
32
+ document.getElementById('ppb').innerHTML = "Pause";
33
+ });
34
+ wavesurfer.on('pause',function() {
35
+ document.getElementById('ppb').innerHTML = "Play";
36
+ });
37
+ REGIONS
38
+ document.addEventListener('keyup', event => {
39
+ if (event.code === 'Space') {
40
+ play();
41
+ }
42
+ })
43
+ function play(){
44
+ wavesurfer.isPlaying() ? wavesurfer.pause() : wavesurfer.play();
45
+ }
46
+ </script>
requirements.txt CHANGED
@@ -1,4 +1,2 @@
1
  git+https://github.com/pyannote/pyannote-audio.git@develop#egg=pyannote-audio
2
  speechbrain == 0.5.10
3
- matplotlib == 3.3.3
4
-
 
1
  git+https://github.com/pyannote/pyannote-audio.git@develop#egg=pyannote-audio
2
  speechbrain == 0.5.10