HuBERT / fairseq /tokenizer.py
aliabd
full working demo
d5175d3
raw
history blame contribute delete
346 Bytes
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import re
SPACE_NORMALIZER = re.compile(r"\s+")
def tokenize_line(line):
line = SPACE_NORMALIZER.sub(" ", line)
line = line.strip()
return line.split()