Spaces:
Runtime error
Runtime error
import os | |
import shutil | |
from pathlib import Path | |
import gradio as gr | |
import pandas as pd | |
from datasets import Audio, Dataset | |
from speechline.segmenters import SilenceSegmenter, WordOverlapSegmenter | |
from speechline.transcribers import Wav2Vec2Transcriber | |
from speechline.utils.tokenizer import WordTokenizer | |
MAX_SEGMENTS = 100 | |
OUTPUT_DIR = "tmp" | |
def segmentation_interface(choice: str): | |
if choice == "Silence Gap": | |
return gr.update(visible=True), gr.update(visible=False) | |
elif choice == "Word Overlap": | |
return gr.update(visible=False), gr.update(visible=True) | |
def run(audio_path, model, segmentation_type, silence_duration, ground_truth): | |
transcriber = Wav2Vec2Transcriber(model) | |
dataset = Dataset.from_dict({"audio": [audio_path]}) | |
dataset = dataset.cast_column( | |
"audio", Audio(sampling_rate=transcriber.sampling_rate) | |
) | |
output_offsets = transcriber.predict(dataset, output_offsets=True) | |
if segmentation_type == "Silence Gap": | |
segmenter = SilenceSegmenter() | |
elif segmentation_type == "Word Overlap": | |
segmenter = WordOverlapSegmenter() | |
tokenizer = WordTokenizer() | |
if os.path.exists(OUTPUT_DIR): | |
shutil.rmtree(OUTPUT_DIR) | |
segmenter.chunk_audio_segments( | |
audio_path, | |
OUTPUT_DIR, | |
output_offsets[0], | |
minimum_chunk_duration=0, | |
silence_duration=silence_duration, | |
ground_truth=tokenizer(ground_truth), | |
) | |
outputs, idx = [], 0 | |
for path in sorted(Path(OUTPUT_DIR).rglob("*")): | |
if path.suffix == ".tsv": | |
gt = pd.read_csv( | |
path, sep="\t", names=["start_offset", "end_offset", "text"] | |
) | |
outputs.append(gr.Dataframe.update(value=gt, visible=True)) | |
elif path.suffix == ".wav": | |
outputs.append(gr.Audio.update(value=str(path), visible=True)) | |
idx += 1 | |
for _ in range(MAX_SEGMENTS - idx): | |
outputs += [gr.Dataframe.update(visible=False), gr.Audio.update(visible=False)] | |
return outputs | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
f""" | |
<center> | |
# ๐๏ธ SpeechLine Demo | |
[Repository](https://github.com/bookbot-kids/speechline) | [Documentation](https://bookbot-kids.github.io/speechline/) | |
</center> | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
audio = gr.Audio(type="filepath") | |
model = gr.Dropdown( | |
choices=[ | |
"facebook/wav2vec2-base-960h", | |
], | |
value="facebook/wav2vec2-base-960h", | |
label="Transcriber Model", | |
) | |
segmenter = gr.Radio( | |
choices=["Silence Gap", "Word Overlap"], | |
value="Silence Gap", | |
label="Segmentation Method", | |
) | |
sil = gr.Slider( | |
0, 1, value=0.1, step=0.1, label="Silence Duration", visible=True | |
) | |
gt = gr.Textbox( | |
label="Ground Truth", | |
placeholder="Enter Ground Truth Text", | |
interactive=True, | |
visible=False, | |
) | |
segmenter.change( | |
fn=segmentation_interface, inputs=segmenter, outputs=[sil, gt] | |
) | |
inputs = [audio, model, segmenter, sil, gt] | |
transcribe_btn = gr.Button("Transcribe") | |
with gr.Column(): | |
outputs = [ | |
gr.Dataframe( | |
visible=True, headers=["start_offset", "end_offset", "text"] | |
), | |
gr.Audio(visible=True), | |
] | |
for _ in range(MAX_SEGMENTS - 1): | |
outputs += [gr.Dataframe(visible=False), gr.Audio(visible=False)] | |
transcribe_btn.click(fn=run, inputs=inputs, outputs=outputs) | |
demo.launch() | |