Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import math | |
import librosa | |
import numpy as np | |
from transformers import Wav2Vec2FeatureExtractor | |
class DataProcessor: | |
def __init__(self, sampling_rate, wav2vec_model_path): | |
self._processor = Wav2Vec2FeatureExtractor.from_pretrained(wav2vec_model_path, local_files_only=True) | |
self._sampling_rate = sampling_rate | |
def extract_feature(self, audio_path): | |
speech_array, sampling_rate = librosa.load(audio_path, sr=self._sampling_rate) | |
input_value = np.squeeze(self._processor(speech_array, sampling_rate=sampling_rate).input_values) | |
return input_value | |
def prepare_audio_feature(wav_file, fps=30, sampling_rate=16000, wav2vec_model_path=None): | |
data_preprocessor = DataProcessor(sampling_rate, wav2vec_model_path) | |
input_value = data_preprocessor.extract_feature(wav_file) | |
seq_len = math.ceil(len(input_value)/sampling_rate*fps) | |
return { | |
"audio_feature": input_value, | |
"seq_len": seq_len | |
} | |