import numpy as np | |
class Utterance: | |
def __init__(self, frames_fpath, wave_fpath): | |
self.frames_fpath = frames_fpath | |
self.wave_fpath = wave_fpath | |
def get_frames(self): | |
return np.load(self.frames_fpath) | |
def random_partial(self, n_frames): | |
""" | |
Crops the frames into a partial utterance of n_frames | |
:param n_frames: The number of frames of the partial utterance | |
:return: the partial utterance frames and a tuple indicating the start and end of the | |
partial utterance in the complete utterance. | |
""" | |
frames = self.get_frames() | |
if frames.shape[0] == n_frames: | |
start = 0 | |
else: | |
start = np.random.randint(0, frames.shape[0] - n_frames) | |
end = start + n_frames | |
return frames[start:end], (start, end) |