|
from speechbrain.inference.interfaces import foreign_class |
|
from custom_interface import CustomEncoderWav2vec2Classifier |
|
from speechbrain.pretrained import EncoderClassifier |
|
import openvino.properties.hint as hints |
|
|
|
|
|
classifier = foreign_class( |
|
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", |
|
pymodule_file="custom_interface.py", |
|
classname="CustomEncoderWav2vec2Classifier" |
|
) |
|
|
|
|
|
checkpoint = EncoderClassifier.from_hparams( |
|
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP", |
|
savedir="./" |
|
) |
|
|
|
|
|
hparams_dict = vars(checkpoint.hparams) |
|
|
|
|
|
backend = "openvino" |
|
torch_device = "cpu" |
|
|
|
if backend == "openvino" and torch_device == "cpu": |
|
|
|
|
|
config = {hints.performance_mode: hints.PerformanceMode.THROUGHPUT} |
|
ov_opts = {"ov_device": "CPU", "config": config} |
|
instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods, |
|
hparams=hparams_dict, model=classifier.mods["wav2vec2"].model, |
|
audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav", |
|
backend="openvino", |
|
opts=ov_opts, |
|
torch_device=torch_device, |
|
save_ov_model=False) |
|
elif backend == "openvino" and torch_device == "cuda": |
|
raise ValueError("OpenVINO backend does not support CUDA devices. \ |
|
Please use cpu for torch_device.") |
|
|
|
if backend == "pytorch": |
|
torch_opts = {"torch_device": torch_device} |
|
instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods, |
|
hparams=hparams_dict, model=classifier.mods["wav2vec2"].model, |
|
audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav", |
|
backend="pytorch", opts=torch_opts, torch_device=torch_device) |
|
|
|
|
|
print("=" * 30) |
|
if backend == "openvino": |
|
print(f"[INFO] Inference Device: {ov_opts['ov_device']}") |
|
print("=" * 30) |
|
print("\n[INFO] Performing OpenVINO inference...") |
|
else: |
|
print(f"[INFO] Inference Device: {torch_opts['torch_device']}") |
|
print("=" * 30) |
|
print("\n[INFO] Performing PyTorch inference...") |
|
|
|
|
|
out_prob, score, index, text_lab = instance.classify_file("speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav") |
|
print(f"[RESULT] Inference output label: {text_lab[index-1]}") |