|
|
|
|
|
|
|
import pandas as pd |
|
import os |
|
import numpy as np |
|
from pathlib import Path |
|
import matplotlib.pyplot as plt |
|
import audiofile |
|
|
|
columns = ['prompt-arousal', |
|
'prompt-dominance', |
|
'prompt-valence', |
|
'prompt-Angry', |
|
'prompt-Sad', |
|
'prompt-Happy', |
|
'prompt-Surprise', |
|
'prompt-Fear', |
|
'prompt-Disgust', |
|
'prompt-Contempt', |
|
'prompt-Neutral', |
|
'styletts2-arousal', |
|
'styletts2-dominance', |
|
'styletts2-valence', |
|
'styletts2-Angry', |
|
'styletts2-Sad', |
|
'styletts2-Happy', |
|
'styletts2-Surprise', |
|
'styletts2-Fear', |
|
'styletts2-Disgust', |
|
'styletts2-Contempt', |
|
'styletts2-Neutral', |
|
'cer-prompt', |
|
'cer-styletts2'] |
|
|
|
FULL_PKL = ['english_4x_analytic.pkl', |
|
'english_analytic.pkl', |
|
'foreign_4x_analytic.pkl', |
|
'foreign_analytic.pkl', |
|
'human_analytic.pkl'] |
|
|
|
|
|
|
|
|
|
LABELS = ['arousal', 'dominance', 'valence', |
|
|
|
'Angry', |
|
'Sad', |
|
'Happy', |
|
'Surprise', |
|
'Fear', |
|
'Disgust', |
|
'Contempt', |
|
'Neutral' |
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
preds = {} |
|
|
|
for file_interface in FULL_PKL: |
|
y = pd.read_pickle(file_interface) |
|
|
|
preds[file_interface] = y |
|
print(f'\n\n {file_interface}\n_____________________________\n', |
|
f"{y['cer-prompt'].mean()=}", |
|
f"{y['cer-styletts2'].mean()=}\n\n") |
|
|
|
|
|
|
|
|
|
|
|
|
|
for lang in ['english', |
|
'foreign']: |
|
|
|
|
|
fig, ax = plt.subplots(nrows=8, ncols=2, figsize=(24,20.7), |
|
gridspec_kw={'hspace': 0, 'wspace': .04}) |
|
|
|
|
|
|
|
|
|
time_stamp = np.arange(len(preds['english_analytic.pkl'])) |
|
_z = np.zeros(len(preds['english_analytic.pkl'])) |
|
for j, dim in enumerate(['arousal', 'dominance', 'valence']): |
|
|
|
|
|
|
|
ax[j, 0].plot(time_stamp, preds[f'{lang}_analytic.pkl'][f'styletts2-{dim}'], |
|
color=(0,104/255,139/255), |
|
label='mean_1', |
|
linewidth=2) |
|
ax[j, 0].fill_between(time_stamp, |
|
|
|
_z, |
|
preds['human_analytic.pkl'][f'styletts2-{dim}'], |
|
|
|
color=(.2,.2,.2), |
|
alpha=0.244) |
|
if j == 0: |
|
if lang == 'english': |
|
desc = 'English' |
|
else: |
|
desc = 'Non-English' |
|
ax[j, 0].legend([f'StyleTTS2 using Mimic-3 {desc}', |
|
f'StyleTTS2 uising EmoDB'], |
|
prop={'size': 14}, |
|
) |
|
ax[j, 0].set_ylabel(dim.lower(), color=(.4, .4, .4), fontsize=17) |
|
|
|
|
|
ax[j, 0].set_ylim([1e-7, .9999]) |
|
|
|
|
|
ax[j, 0].set_xticklabels(['' for _ in ax[j, 0].get_xticklabels()]) |
|
ax[j, 0].set_xlim([time_stamp[0], time_stamp[-1]]) |
|
|
|
|
|
|
|
|
|
|
|
ax[j, 1].plot(time_stamp, preds[f'{lang}_4x_analytic.pkl'][f'styletts2-{dim}'], |
|
color=(0,104/255,139/255), |
|
label='mean_1', |
|
linewidth=2) |
|
ax[j, 1].fill_between(time_stamp, |
|
|
|
_z, |
|
preds['human_analytic.pkl'][f'styletts2-{dim}'], |
|
|
|
color=(.2,.2,.2), |
|
alpha=0.244) |
|
if j == 0: |
|
if lang == 'english': |
|
desc = 'English' |
|
else: |
|
desc = 'Non-English' |
|
ax[j, 1].legend([f'StyleTTS2 using Mimic-3 {desc} 4x speed', |
|
f'StyleTTS2 using EmoDB'], |
|
prop={'size': 14}, |
|
|
|
) |
|
|
|
|
|
ax[j, 1].set_xlabel('720 Harvard Sentences') |
|
|
|
|
|
|
|
|
|
ax[j, 1].set_ylim([1e-7, .9999]) |
|
|
|
ax[j, 1].set_xticklabels(['' for _ in ax[j, 0].get_xticklabels()]) |
|
ax[j, 1].set_xlim([time_stamp[0], time_stamp[-1]]) |
|
|
|
|
|
|
|
|
|
ax[j, 0].grid() |
|
ax[j, 1].grid() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for j, dim in enumerate(['Angry', |
|
'Sad', |
|
'Happy', |
|
|
|
'Fear', |
|
'Disgust', |
|
|
|
|
|
]): |
|
j = j + 3 |
|
|
|
|
|
|
|
ax[j, 0].plot(time_stamp, preds[f'{lang}_analytic.pkl'][f'styletts2-{dim}'], |
|
color=(0,104/255,139/255), |
|
label='mean_1', |
|
linewidth=2) |
|
ax[j, 0].fill_between(time_stamp, |
|
|
|
_z, |
|
preds['human_analytic.pkl'][f'styletts2-{dim}'], |
|
|
|
color=(.2,.2,.2), |
|
alpha=0.244) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ax[j, 0].set_ylabel(dim.lower(), color=(.4, .4, .4), fontsize=17) |
|
|
|
|
|
ax[j, 0].set_ylim([1e-7, .9999]) |
|
ax[j, 0].set_xlim([time_stamp[0], time_stamp[-1]]) |
|
ax[j, 0].set_xticklabels(['' for _ in ax[j, 0].get_xticklabels()]) |
|
ax[j, 0].set_xlabel('720 Harvard Sentences', fontsize=17, color=(.2,.2,.2)) |
|
|
|
|
|
|
|
|
|
|
|
ax[j, 1].plot(time_stamp, preds[f'{lang}_4x_analytic.pkl'][f'styletts2-{dim}'], |
|
color=(0,104/255,139/255), |
|
label='mean_1', |
|
linewidth=2) |
|
ax[j, 1].fill_between(time_stamp, |
|
|
|
_z, |
|
preds['human_analytic.pkl'][f'styletts2-{dim}'], |
|
|
|
color=(.2,.2,.2), |
|
alpha=0.244) |
|
|
|
|
|
|
|
|
|
|
|
ax[j, 1].set_xlabel('720 Harvard Sentences', fontsize=17, color=(.2,.2,.2)) |
|
ax[j, 1].set_ylim([1e-7, .9999]) |
|
|
|
ax[j, 1].set_xticklabels(['' for _ in ax[j, 1].get_xticklabels()]) |
|
ax[j, 1].set_xlim([time_stamp[0], time_stamp[-1]]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
ax[j, 0].grid() |
|
ax[j, 1].grid() |
|
|
|
|
|
|
|
plt.savefig(f'persentence_{lang}.pdf', bbox_inches='tight') |
|
plt.close() |
|
|
|
|