File size: 2,764 Bytes
8a69132 50b4d3f 8a69132 50b4d3f 8a69132 50b4d3f 8a69132 50b4d3f 8a69132 50b4d3f 8a69132 50b4d3f 8a69132 50b4d3f 8a69132 7f0785a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from speechbrain.inference.interfaces import foreign_class
from custom_interface import CustomEncoderWav2vec2Classifier
from speechbrain.pretrained import EncoderClassifier
import openvino.properties.hint as hints
# Function in SpeechBrain to load and use custom PyTorch models
classifier = foreign_class(
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
pymodule_file="custom_interface.py",
classname="CustomEncoderWav2vec2Classifier"
)
# Model checkpoint files
checkpoint = EncoderClassifier.from_hparams(
source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
savedir="./" # Directory to save the model
)
# Convert hparams to a dictionary
hparams_dict = vars(checkpoint.hparams)
# inference backend
backend = "openvino"
torch_device = "cpu"
if backend == "openvino" and torch_device == "cpu":
# OpenVINO inference optimization parameters
config = {hints.performance_mode: hints.PerformanceMode.THROUGHPUT}
ov_opts = {"ov_device": "CPU", "config": config}
instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods,
hparams=hparams_dict, model=classifier.mods["wav2vec2"].model,
audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav",
backend="openvino",
opts=ov_opts,
torch_device=torch_device,
save_ov_model=False)
elif backend == "openvino" and torch_device == "cuda":
raise ValueError("OpenVINO backend does not support CUDA devices. \
Please use cpu for torch_device.")
if backend == "pytorch":
torch_opts = {"torch_device": torch_device}
instance = CustomEncoderWav2vec2Classifier(modules=checkpoint.mods,
hparams=hparams_dict, model=classifier.mods["wav2vec2"].model,
audio_file_path="speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav",
backend="pytorch", opts=torch_opts, torch_device=torch_device)
# OpenVINO inference
print("=" * 30)
if backend == "openvino":
print(f"[INFO] Inference Device: {ov_opts['ov_device']}")
print("=" * 30)
print("\n[INFO] Performing OpenVINO inference...")
else:
print(f"[INFO] Inference Device: {torch_opts['torch_device']}")
print("=" * 30)
print("\n[INFO] Performing PyTorch inference...")
out_prob, score, index, text_lab = instance.classify_file("speechbrain/emotion-recognition-wav2vec2-IEMOCAP/anger.wav")
print(f"[RESULT] Inference output label: {text_lab[index-1]}") |