|
""" Test the speed of the augmentation """ |
|
import torch |
|
import torchaudio |
|
|
|
|
|
device = torch.device("cuda") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
t = torch.arange(0, 2.0479, 1 / 16000) |
|
x = torch.sin(2 * torch.pi * 440 * t) * 0.5 |
|
x = x.reshape(1, 1, 32767).tile(80, 1, 1) |
|
x = x.to(device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import torch |
|
from torch_audiomentations import Compose, PitchShift, Gain, PolarityInversion |
|
|
|
apply_augmentation = Compose(transforms=[ |
|
|
|
|
|
|
|
|
|
|
|
|
|
PitchShift( |
|
min_transpose_semitones=0, |
|
max_transpose_semitones=2.2, |
|
mode="per_batch", |
|
p=1.0, |
|
p_mode="per_batch", |
|
sample_rate=16000, |
|
target_rate=16000) |
|
]) |
|
x_am = apply_augmentation(x, sample_rate=16000) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from torchaudio import transforms |
|
|
|
ta_transform = transforms.PitchShift(16000, n_steps=2).to(device) |
|
x_ta = ta_transform(x) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from model.pitchshift_layer import PitchShiftLayer |
|
|
|
ps_ymt3 = PitchShiftLayer(pshift_range=[2, 2], fs=16000, min_gcd=16, n_fft=2048).to(device) |
|
x_ymt3 = ps_ymt3(x, 2) |
|
|
|
|
|
|
|
|
|
import matplotlib.pyplot as plt |
|
|
|
|
|
models = ['torch-audiomentation', 'torchaudio', 'YourMT3:PitchShiftLayer'] |
|
|
|
|
|
cpu_time = [1.18, 4.01, 0.389] |
|
|
|
|
|
gpu_time = [58, 25.1, 7.18] |
|
|
|
|
|
gpu_memory = [3.8, 5.49, 0.016] |
|
|
|
|
|
fig, axs = plt.subplots(1, 3, figsize=(15, 5)) |
|
|
|
|
|
bar1 = axs[0].bar(models, cpu_time, color=['#FFB6C1', '#ADD8E6', '#98FB98']) |
|
bar2 = axs[1].bar(models, gpu_time, color=['#FFB6C1', '#ADD8E6', '#98FB98']) |
|
bar3 = axs[2].bar(models, gpu_memory, color=['#FFB6C1', '#ADD8E6', '#98FB98']) |
|
|
|
|
|
axs[0].set_ylabel('Time (s)') |
|
axs[0].set_title('Process Time (CPU) bsz=80') |
|
axs[1].set_ylabel('Time (ms)') |
|
axs[1].set_title('Process Time (GPU) bsz=80') |
|
axs[2].set_ylabel('Memory (GB)') |
|
axs[2].set_title('GPU Memory Usage per semitone') |
|
|
|
|
|
for ax in axs: |
|
ax.grid(axis='y') |
|
ax.set_yscale('log') |
|
ax.set_xticklabels(models, rotation=45, ha="right") |
|
|
|
|
|
for i, rect in enumerate(bar1): |
|
axs[0].text( |
|
rect.get_x() + rect.get_width() / 2, |
|
rect.get_height(), |
|
f'{cpu_time[i]:.2f} s', |
|
ha='center', |
|
va='bottom') |
|
for i, rect in enumerate(bar2): |
|
axs[1].text( |
|
rect.get_x() + rect.get_width() / 2, |
|
rect.get_height(), |
|
f'{gpu_time[i]:.2f} ms', |
|
ha='center', |
|
va='bottom') |
|
for i, rect in enumerate(bar3): |
|
axs[2].text( |
|
rect.get_x() + rect.get_width() / 2, |
|
rect.get_height(), |
|
f'{gpu_memory[i]:.3f} GB', |
|
ha='center', |
|
va='bottom') |
|
plt.tight_layout() |
|
plt.show() |
|
|
|
|
|
|
|
|
|
|
|
processing_type = ['Stretch (Phase Vocoder)', 'Resampler (Conv1D)'] |
|
cpu_times = [143, 245] |
|
gpu_times = [6.47, 0.71] |
|
|
|
|
|
fig, axs = plt.subplots(1, 2, figsize=(12, 6)) |
|
|
|
|
|
axs[0].bar(processing_type, cpu_times, color=['#ADD8E6', '#98FB98']) |
|
axs[1].bar(processing_type, gpu_times, color=['#ADD8E6', '#98FB98']) |
|
|
|
|
|
axs[0].set_ylabel('Time (ms)') |
|
axs[0].set_title('Contribution of CPU Processing Time: YMT3-PS (BSZ=80)') |
|
axs[1].set_title('Contribution of GPU Processing Time: YMT3-PS (BSZ=80)') |
|
|
|
|
|
for ax in axs: |
|
ax.grid(axis='y') |
|
ax.set_yscale('log') |
|
|
|
|
|
for ax, times in zip(axs, [cpu_times, gpu_times]): |
|
for idx, time in enumerate(times): |
|
ax.text(idx, time, f"{time:.2f} ms", ha='center', va='bottom', fontsize=8) |
|
plt.tight_layout() |
|
plt.show() |
|
|