AdalAbilbekov commited on
Commit
ae8e1dd
1 Parent(s): 9d9a36a

First commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. app.py +117 -0
  3. cnnwt_SGD_1959.pt +3 -0
  4. config.json +37 -0
  5. configs/.DS_Store +0 -0
  6. configs/hifigan-config.json +37 -0
  7. configs/train_grad.json +68 -0
  8. data_collate.py +147 -0
  9. data_loader.py +309 -0
  10. data_preparation.py +108 -0
  11. env.py +15 -0
  12. filelists/all_spks/eval_utts.txt +3730 -0
  13. filelists/all_spks/feats.ark +0 -0
  14. filelists/all_spks/feats.scp +3 -0
  15. filelists/all_spks/text +0 -0
  16. filelists/all_spks/train_utts.txt +0 -0
  17. filelists/all_spks/utt2emo.json +0 -0
  18. filelists/all_spks/utt2spk.json +0 -0
  19. filelists/inference_generated.txt +2 -0
  20. g_01720000 +3 -0
  21. grad_uncond.pt +3 -0
  22. grad_uncond_10k_conf.pt +3 -0
  23. grad_uncond_cnn_001.pt +3 -0
  24. inference_EMA.py +89 -0
  25. inference_intensity_control.ipynb +0 -0
  26. melspec.py +40 -0
  27. model/__init__.py +2 -0
  28. model/__pycache__/__init__.cpython-39.pyc +0 -0
  29. model/__pycache__/tts.cpython-39.pyc +0 -0
  30. model/base.py +28 -0
  31. model/classifier.py +690 -0
  32. model/diffusion.py +513 -0
  33. model/monotonic_align/LICENCE +21 -0
  34. model/monotonic_align/__init__.py +23 -0
  35. model/monotonic_align/__pycache__/__init__.cpython-39.pyc +0 -0
  36. model/monotonic_align/build/lib.macosx-11.1-arm64-cpython-310/model/monotonic_align/core.cpython-310-darwin.so +0 -0
  37. model/monotonic_align/build/temp.linux-x86_64-3.6/core.o +3 -0
  38. model/monotonic_align/build/temp.macosx-10.9-x86_64-3.6/core.o +0 -0
  39. model/monotonic_align/build/temp.macosx-11.1-arm64-cpython-310/core.o +0 -0
  40. model/monotonic_align/core.c +0 -0
  41. model/monotonic_align/core.pyx +45 -0
  42. model/monotonic_align/model/monotonic_align/core.cpython-310-darwin.so +0 -0
  43. model/monotonic_align/setup.py +11 -0
  44. model/text_encoder.py +326 -0
  45. model/tts.py +558 -0
  46. model/utils.py +44 -0
  47. models.py +283 -0
  48. text/.DS_Store +0 -0
  49. text/LICENSE +30 -0
  50. text/__init__.py +106 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ g_01720000 filter=lfs diff=lfs merge=lfs -text
37
+ model/monotonic_align/build/temp.linux-x86_64-3.6/core.o filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import argparse
3
+ import json
4
+ import datetime as dt
5
+ import numpy as np
6
+ from scipy.io.wavfile import write
7
+ import gradio as gr
8
+ import torch
9
+ from pydub import AudioSegment
10
+ from model.classifier import SpecClassifier
11
+ from torch.utils.data import DataLoader
12
+ from text import text_to_sequence, cmudict
13
+ from text.symbols import symbols
14
+ import utils_data as utils
15
+ from utils import load_checkpoint_no_logger
16
+ from kaldiio import WriteHelper
17
+ import os
18
+ from tqdm import tqdm
19
+ from text import text_to_sequence, convert_text
20
+ import sys
21
+ from model import GradTTSXvector, GradTTSWithEmo
22
+ import IPython.display as ipd
23
+
24
+ device = ('cuda' if torch.cuda.is_available() else 'cpu')
25
+ device
26
+
27
+ hps, args = utils.get_hparams_decode_two_mixture()
28
+
29
+ gradtts_uncond_model = GradTTSWithEmo
30
+
31
+ gradtts_uncond_model = gradtts_uncond_model(**hps.model).to(device)
32
+ model = SpecClassifier(
33
+ in_dim=hps.data.n_mel_channels,
34
+ d_decoder=hps.model.d_decoder,
35
+ h_decoder=hps.model.h_decoder,
36
+ l_decoder=hps.model.l_decoder,
37
+ k_decoder=hps.model.k_decoder,
38
+ decoder_dropout=hps.model.decoder_dropout,
39
+ n_class=hps.model.n_emos,
40
+ cond_dim=hps.data.n_mel_channels,
41
+ model_type=getattr(hps.model, "classifier_type", "CNN-with-time")
42
+ )
43
+
44
+ ckpt = './cnnwt_SGD_1959.pt'
45
+ ckpt_tts = './grad_uncond_cnn_001.pt'
46
+
47
+ utils.load_checkpoints_no_logger(ckpt_tts, gradtts_uncond_model, None)
48
+ utils.load_checkpoints_no_logger(ckpt, model, None)
49
+
50
+ _ = model.to(device).eval()
51
+
52
+ HIFIGAN_CONFIG = './config.json'
53
+ HIFIGAN_CHECKPT = './g_01720000'
54
+
55
+ from models import Generator as HiFiGAN
56
+ from env import AttrDict
57
+ print('Initializing HiFi-GAN...')
58
+ with open(HIFIGAN_CONFIG) as f:
59
+ h = AttrDict(json.load(f))
60
+ vocoder = HiFiGAN(h)
61
+ vocoder.load_state_dict(torch.load(HIFIGAN_CHECKPT, map_location=lambda loc, storage: loc)['generator'])
62
+ _ = vocoder.to(device).eval()
63
+ vocoder.remove_weight_norm()
64
+
65
+ def generate_audio(text, quantity, speaker, emotion_1, emotion_2):
66
+ x, x_lengths = convert_text(text)
67
+ emo_1, emo_2 = emotion_1, emotion_2
68
+ emo1 = torch.LongTensor([emo_1]).to(device)
69
+ emo2 = torch.LongTensor([emo_2]).to(device)
70
+ sid = torch.LongTensor([spekears.index(speaker)]).to(device)
71
+ intensity = quantity / 100
72
+
73
+ y_enc, y_dec, attn = gradtts_uncond_model.classifier_guidance_decode_two_mixture(
74
+ x, x_lengths,
75
+ n_timesteps=10,
76
+ temperature=2.0,
77
+ stoc=args.stoc,
78
+ spk=sid,
79
+ emo1=emo1,
80
+ emo2=emo2,
81
+ emo1_weight=intensity,
82
+ length_scale=1.,
83
+ classifier_func=model.forward,
84
+ guidance=300,
85
+ classifier_type=model.model_type
86
+ )
87
+ y_dec = y_dec.detach()
88
+ # y_dec = torch.nan_to_num(y_dec)
89
+ res = y_dec.squeeze().cpu().numpy()
90
+ x = torch.from_numpy(res).cuda().unsqueeze(0)
91
+ y_g_hat = vocoder(x)
92
+ audio = y_g_hat.squeeze()
93
+ audio = audio * 32768.0
94
+ audio = audio.detach().cpu().numpy().astype('int16')
95
+ sr = 22050
96
+ return sr, audio
97
+
98
+ # def sentence_builder(quantity, emotion_1, emotion_2):
99
+ # return f"""The {quantity} {emotion_1}s from {" and ".join(emotion_2)}"""
100
+
101
+ emotions = sorted(["angry", "surprise", "fear", "happy", "neutral", "sad"])
102
+ spekears = ['Madi', 'Marzhan', 'Akzhol']
103
+
104
+ demo = gr.Interface(
105
+ generate_audio,
106
+ [
107
+ gr.Slider(0, 100, value=0, step=10, label="Count", info="Choose between 0 and 100"),
108
+ gr.Dropdown(spekears, value=spekears[1], label="Narrator", info="Select a narrator."
109
+ ),
110
+ gr.Dropdown(emotions, label="Emotion 1", info="Select first emotion"),
111
+ gr.Dropdown(emotions, value=emotions[3], label="Emotion 2", info="Select second emotion."
112
+ ),
113
+ ],
114
+ "audio"
115
+ )
116
+
117
+ demo.launch()
cnnwt_SGD_1959.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dad97a741b8faad42f6d4c0ccd808f20cd4d1e01890db0c3935d131dfafc9977
3
+ size 1948051
config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 1,
4
+ "batch_size": 64,
5
+ "learning_rate": 0.0002,
6
+ "adam_b1": 0.8,
7
+ "adam_b2": 0.99,
8
+ "lr_decay": 0.999,
9
+ "seed": 1234,
10
+
11
+ "upsample_rates": [8,8,2,2],
12
+ "upsample_kernel_sizes": [16,16,4,4],
13
+ "upsample_initial_channel": 512,
14
+ "resblock_kernel_sizes": [3,7,11],
15
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
16
+
17
+ "segment_size": 8192,
18
+ "num_mels": 80,
19
+ "num_freq": 1025,
20
+ "n_fft": 1024,
21
+ "hop_size": 256,
22
+ "win_size": 1024,
23
+
24
+ "sampling_rate": 22050,
25
+
26
+ "fmin": 0,
27
+ "fmax": 8000,
28
+ "fmax_for_loss": null,
29
+
30
+ "num_workers": 4,
31
+
32
+ "dist_config": {
33
+ "dist_backend": "nccl",
34
+ "dist_url": "tcp://localhost:54320",
35
+ "world_size": 1
36
+ }
37
+ }
configs/.DS_Store ADDED
Binary file (6.15 kB). View file
 
configs/hifigan-config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "resblock": "1",
3
+ "num_gpus": 1,
4
+ "batch_size": 64,
5
+ "learning_rate": 0.0002,
6
+ "adam_b1": 0.8,
7
+ "adam_b2": 0.99,
8
+ "lr_decay": 0.999,
9
+ "seed": 1234,
10
+
11
+ "upsample_rates": [8,8,2,2],
12
+ "upsample_kernel_sizes": [16,16,4,4],
13
+ "upsample_initial_channel": 512,
14
+ "resblock_kernel_sizes": [3,7,11],
15
+ "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
16
+
17
+ "segment_size": 8192,
18
+ "num_mels": 80,
19
+ "num_freq": 1025,
20
+ "n_fft": 1024,
21
+ "hop_size": 256,
22
+ "win_size": 1024,
23
+
24
+ "sampling_rate": 22050,
25
+
26
+ "fmin": 0,
27
+ "fmax": 8000,
28
+ "fmax_for_loss": null,
29
+
30
+ "num_workers": 4,
31
+
32
+ "dist_config": {
33
+ "dist_backend": "nccl",
34
+ "dist_url": "tcp://localhost:54320",
35
+ "world_size": 1
36
+ }
37
+ }
configs/train_grad.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "xvector": false,
3
+ "pe": false,
4
+ "train": {
5
+ "test_size": 6,
6
+ "n_epochs": 10000,
7
+ "batch_size": 64,
8
+ "learning_rate": 1e-4,
9
+ "seed": 37,
10
+ "save_every": 1,
11
+ "use_gt_dur": false
12
+ },
13
+ "data": {
14
+ "load_mel_from_disk": false,
15
+ "train_utts": "filelists/all_spks/train_utts.txt",
16
+ "val_utts": "filelists/all_spks/eval_utts.txt",
17
+ "train_utt2phns": "filelists/all_spks/text",
18
+ "val_utt2phns": "filelists/all_spks/text",
19
+ "train_feats_scp": "filelists/all_spks/feats.scp",
20
+ "val_feats_scp": "filelists/all_spks/feats.scp",
21
+ "train_utt2spk": "filelists/all_spks/utt2spk.json",
22
+ "val_utt2spk": "filelists/all_spks/utt2spk.json",
23
+ "train_utt2emo": "filelists/all_spks/utt2emo.json",
24
+ "val_utt2emo": "filelists/all_spks/utt2emo.json",
25
+
26
+ "train_var_scp": "",
27
+ "val_var_scp": "",
28
+
29
+ "text_cleaners": [
30
+ "kazakh_cleaners"
31
+ ],
32
+ "max_wav_value": 32768.0,
33
+ "sampling_rate": 22050,
34
+ "filter_length": 1024,
35
+ "hop_length": 200,
36
+ "win_length": 800,
37
+ "n_mel_channels": 80,
38
+ "mel_fmin": 20.0,
39
+ "mel_fmax": 8000.0,
40
+ "utt2phn_path": "data/res_utt2phns.json",
41
+ "add_blank": false
42
+ },
43
+ "model": {
44
+ "n_vocab": 200,
45
+ "n_spks": 3,
46
+ "n_emos": 6,
47
+ "spk_emb_dim": 64,
48
+ "n_enc_channels": 192,
49
+ "filter_channels": 768,
50
+ "filter_channels_dp": 256,
51
+ "n_enc_layers": 6,
52
+ "enc_kernel": 3,
53
+ "enc_dropout": 0.1,
54
+ "n_heads": 2,
55
+ "window_size": 4,
56
+ "dec_dim": 64,
57
+ "beta_min": 0.05,
58
+ "beta_max": 20.0,
59
+ "pe_scale": 1000,
60
+ "d_decoder": 128,
61
+ "l_decoder": 3,
62
+ "k_decoder": 7,
63
+ "h_decoder": 4,
64
+ "decoder_dropout":0.1,
65
+
66
+ "classifier_type": "CNN-with-time"
67
+ }
68
+ }
data_collate.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+ import numpy as np
4
+ import torch
5
+ import re
6
+ import torch.utils.data
7
+ import json
8
+
9
+ import kaldiio
10
+ from tqdm import tqdm
11
+
12
+
13
+ class BaseCollate:
14
+ def __init__(self, n_frames_per_step=1):
15
+ self.n_frames_per_step = n_frames_per_step
16
+
17
+ def collate_text_mel(self, batch: [dict]):
18
+ """
19
+ :param batch: list of dicts
20
+ """
21
+ utt = list(map(lambda x: x['utt'], batch))
22
+ input_lengths, ids_sorted_decreasing = torch.sort(
23
+ torch.LongTensor([len(x['text']) for x in batch]),
24
+ dim=0, descending=True)
25
+ max_input_len = input_lengths[0]
26
+
27
+ text_padded = torch.LongTensor(len(batch), max_input_len)
28
+ text_padded.zero_()
29
+ for i in range(len(ids_sorted_decreasing)):
30
+ text = batch[ids_sorted_decreasing[i]]['text']
31
+ text_padded[i, :text.size(0)] = text
32
+
33
+ # Right zero-pad mel-spec
34
+ num_mels = batch[0]['mel'].size(0)
35
+ max_target_len = max([x['mel'].size(1) for x in batch])
36
+ if max_target_len % self.n_frames_per_step != 0:
37
+ max_target_len += self.n_frames_per_step - max_target_len % self.n_frames_per_step
38
+ assert max_target_len % self.n_frames_per_step == 0
39
+
40
+ # include mel padded
41
+ mel_padded = torch.FloatTensor(len(batch), num_mels, max_target_len)
42
+ mel_padded.zero_()
43
+ output_lengths = torch.LongTensor(len(batch))
44
+ for i in range(len(ids_sorted_decreasing)):
45
+ mel = batch[ids_sorted_decreasing[i]]['mel']
46
+ mel_padded[i, :, :mel.size(1)] = mel
47
+ output_lengths[i] = mel.size(1)
48
+
49
+ utt_name = np.array(utt)[ids_sorted_decreasing].tolist()
50
+ if isinstance(utt_name, str):
51
+ utt_name = [utt_name]
52
+
53
+ res = {
54
+ "utt": utt_name,
55
+ "text_padded": text_padded,
56
+ "input_lengths": input_lengths,
57
+ "mel_padded": mel_padded,
58
+ "output_lengths": output_lengths,
59
+ }
60
+ return res, ids_sorted_decreasing
61
+
62
+
63
+ class SpkIDCollate(BaseCollate):
64
+ def __call__(self, batch, *args, **kwargs):
65
+ base_data, ids_sorted_decreasing = self.collate_text_mel(batch)
66
+ spk_ids = torch.LongTensor(list(map(lambda x: x["spk_ids"], batch)))
67
+ spk_ids = spk_ids[ids_sorted_decreasing]
68
+ base_data.update({
69
+ "spk_ids": spk_ids
70
+ })
71
+ return base_data
72
+
73
+
74
+ class SpkIDCollateWithEmo(BaseCollate):
75
+ def __call__(self, batch, *args, **kwargs):
76
+ base_data, ids_sorted_decreasing = self.collate_text_mel(batch)
77
+
78
+ spk_ids = torch.LongTensor(list(map(lambda x: x["spk_ids"], batch)))
79
+ spk_ids = spk_ids[ids_sorted_decreasing]
80
+ emo_ids = torch.LongTensor(list(map(lambda x: x['emo_ids'], batch)))
81
+ emo_ids = emo_ids[ids_sorted_decreasing]
82
+ base_data.update({
83
+ "spk_ids": spk_ids,
84
+ "emo_ids": emo_ids
85
+ })
86
+ return base_data
87
+
88
+
89
+ class XvectorCollate(BaseCollate):
90
+ def __call__(self, batch, *args, **kwargs):
91
+ base_data, ids_sorted_decreasing = self.collate_text_mel(batch)
92
+ xvectors = torch.cat(list(map(lambda x: x["xvector"].unsqueeze(0), batch)), dim=0)
93
+ xvectors = xvectors[ids_sorted_decreasing]
94
+ base_data.update({
95
+ "xvector": xvectors
96
+ })
97
+ return base_data
98
+
99
+
100
+ class SpkIDCollateWithPE(BaseCollate):
101
+ def __call__(self, batch, *args, **kwargs):
102
+ base_data, ids_sorted_decreasing = self.collate_text_mel(batch)
103
+ spk_ids = torch.LongTensor(list(map(lambda x: x["spk_ids"], batch)))
104
+ spk_ids = spk_ids[ids_sorted_decreasing]
105
+
106
+ num_var = batch[0]["var"].size(0)
107
+ max_target_len = max([x["var"].size(1) for x in batch])
108
+ if max_target_len % self.n_frames_per_step != 0:
109
+ max_target_len += self.n_frames_per_step - max_target_len % self.n_frames_per_step
110
+ assert max_target_len % self.n_frames_per_step == 0
111
+
112
+ var_padded = torch.FloatTensor(len(batch), num_var, max_target_len)
113
+ var_padded.zero_()
114
+ for i in range(len(ids_sorted_decreasing)):
115
+ var = batch[ids_sorted_decreasing[i]]["var"]
116
+ var_padded[i, :, :var.size(1)] = var
117
+
118
+ base_data.update({
119
+ "spk_ids": spk_ids,
120
+ "var_padded": var_padded
121
+ })
122
+ return base_data
123
+
124
+
125
+ class XvectorCollateWithPE(BaseCollate):
126
+ def __call__(self, batch, *args, **kwargs):
127
+ base_data, ids_sorted_decreasing = self.collate_text_mel(batch)
128
+ xvectors = torch.cat(list(map(lambda x: x["xvector"].unsqueeze(0), batch)), dim=0)
129
+ xvectors = xvectors[ids_sorted_decreasing]
130
+
131
+ num_var = batch[0]["var"].size(0)
132
+ max_target_len = max([x["var"].size(1) for x in batch])
133
+ if max_target_len % self.n_frames_per_step != 0:
134
+ max_target_len += self.n_frames_per_step - max_target_len % self.n_frames_per_step
135
+ assert max_target_len % self.n_frames_per_step == 0
136
+
137
+ var_padded = torch.FloatTensor(len(batch), num_var, max_target_len)
138
+ var_padded.zero_()
139
+ for i in range(len(ids_sorted_decreasing)):
140
+ var = batch[ids_sorted_decreasing[i]]["var"]
141
+ var_padded[i, :, :var.size(1)] = var
142
+
143
+ base_data.update({
144
+ "xvector": xvectors,
145
+ "var_padded": var_padded
146
+ })
147
+ return base_data
data_loader.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path
2
+ import random
3
+ import numpy as np
4
+ import torch
5
+ import re
6
+ import torch.utils.data
7
+ import json
8
+
9
+ import kaldiio
10
+ from tqdm import tqdm
11
+ from text import text_to_sequence
12
+
13
+ class BaseLoader(torch.utils.data.Dataset):
14
+ def __init__(self, utts: str, hparams, feats_scp: str, utt2text:str):
15
+ """
16
+ :param utts: file path. A list of utts for this loader. These are the only utts that this loader has access.
17
+ This loader only deals with text, duration and feats. Other files despite `utts` can be larger.
18
+ """
19
+ self.n_mel_channels = hparams.n_mel_channels
20
+ self.sampling_rate = hparams.sampling_rate
21
+ self.utts = self.get_utts(utts)
22
+ self.utt2feat = self.get_utt2feat(feats_scp)
23
+ self.utt2text = self.get_utt2text(utt2text)
24
+
25
+ def get_utts(self, utts: str) -> list:
26
+ with open(utts, 'r') as f:
27
+ L = f.readlines()
28
+ L = list(map(lambda x: x.strip(), L))
29
+ random.seed(1234)
30
+ random.shuffle(L)
31
+ return L
32
+
33
+
34
+ def get_utt2feat(self, feats_scp: str):
35
+ utt2feat = kaldiio.load_scp(feats_scp) # lazy load mode
36
+ print(f"Succeed reading feats from {feats_scp}")
37
+ return utt2feat
38
+
39
+ def get_utt2text(self, utt2text: str):
40
+ with open(utt2text, 'r') as f:
41
+ L = f.readlines()
42
+ utt2text = {line.split()[0]: line.strip().split(" ", 1)[1] for line in L}
43
+ return utt2text
44
+
45
+ def get_mel_from_kaldi(self, utt):
46
+ feat = self.utt2feat[utt]
47
+ feat = torch.FloatTensor(feat).squeeze()
48
+ assert self.n_mel_channels in feat.shape
49
+ if feat.shape[0] == self.n_mel_channels:
50
+ return feat
51
+ else:
52
+ return feat.T
53
+
54
+ def get_text(self, utt):
55
+ text = self.utt2text[utt]
56
+ text_norm = text_to_sequence(text)
57
+ text_norm = torch.IntTensor(text_norm)
58
+ return text_norm
59
+
60
+ def __getitem__(self, index):
61
+ res = self.get_mel_text_pair(self.utts[index])
62
+ return res
63
+
64
+ def __len__(self):
65
+ return len(self.utts)
66
+
67
+ def sample_test_batch(self, size):
68
+ idx = np.random.choice(range(len(self)), size=size, replace=False)
69
+ test_batch = []
70
+ for index in idx:
71
+ test_batch.append(self.__getitem__(index))
72
+ return test_batch
73
+
74
+
75
+ class SpkIDLoader(BaseLoader):
76
+ def __init__(self, utts: str, hparams, feats_scp: str, utt2phns: str, phn2id: str,
77
+ utt2phn_duration: str, utt2spk: str):
78
+ """
79
+ :param utt2spk: json file path (utt name -> spk id)
80
+ This loader loads speaker as a speaker ID for embedding table
81
+ """
82
+ super(SpkIDLoader, self).__init__(utts, hparams, feats_scp, utt2phns, phn2id, utt2phn_duration)
83
+ self.utt2spk = self.get_utt2spk(utt2spk)
84
+
85
+ def get_utt2spk(self, utt2spk: str) -> dict:
86
+ with open(utt2spk, 'r') as f:
87
+ res = json.load(f)
88
+ return res
89
+
90
+ def get_mel_text_pair(self, utt):
91
+ # separate filename and text
92
+ spkid = self.utt2spk[utt]
93
+ phn_ids = self.get_text(utt)
94
+ mel = self.get_mel_from_kaldi(utt)
95
+ dur = self.get_dur_from_kaldi(utt)
96
+
97
+ assert sum(dur) == mel.shape[1], f"Frame length mismatch: utt {utt}, dur: {sum(dur)}, mel: {mel.shape[1]}"
98
+ res = {
99
+ "utt": utt,
100
+ "mel": mel,
101
+ "spk_ids": spkid
102
+ }
103
+ return res
104
+
105
+ def __getitem__(self, index):
106
+ res = self.get_mel_text_pair(self.utts[index])
107
+ return res
108
+
109
+ def __len__(self):
110
+ return len(self.utts)
111
+
112
+
113
+ class SpkIDLoaderWithEmo(BaseLoader):
114
+ def __init__(self, utts: str, hparams, feats_scp: str, utt2text:str, utt2spk: str, utt2emo: str):
115
+ """
116
+ :param utt2spk: json file path (utt name -> spk id)
117
+ This loader loads speaker as a speaker ID for embedding table
118
+ """
119
+ super(SpkIDLoaderWithEmo, self).__init__(utts, hparams, feats_scp, utt2text)
120
+ self.utt2spk = self.get_utt2spk(utt2spk)
121
+ self.utt2emo = self.get_utt2emo(utt2emo)
122
+
123
+ def get_utt2spk(self, utt2spk: str) -> dict:
124
+ with open(utt2spk, 'r') as f:
125
+ res = json.load(f)
126
+ return res
127
+
128
+ def get_utt2emo(self, utt2emo: str) -> dict:
129
+ with open(utt2emo, 'r') as f:
130
+ res = json.load(f)
131
+ return res
132
+
133
+ def get_mel_text_pair(self, utt):
134
+ # separate filename and text
135
+ spkid = int(self.utt2spk[utt])
136
+ emoid = int(self.utt2emo[utt])
137
+ text = self.get_text(utt)
138
+ mel = self.get_mel_from_kaldi(utt)
139
+
140
+ res = {
141
+ "utt": utt,
142
+ "text": text,
143
+ "mel": mel,
144
+ "spk_ids": spkid,
145
+ "emo_ids": emoid
146
+ }
147
+ return res
148
+
149
+ def __getitem__(self, index):
150
+ res = self.get_mel_text_pair(self.utts[index])
151
+ return res
152
+
153
+ def __len__(self):
154
+ return len(self.utts)
155
+
156
+
157
+ class SpkIDLoaderWithPE(SpkIDLoader):
158
+ def __init__(self, utts: str, hparams, feats_scp: str, utt2phns: str, phn2id: str,
159
+ utt2phn_duration: str, utt2spk: str, var_scp: str):
160
+ """
161
+ This loader loads speaker ID together with variance (4-dim pitch, 1-dim energy)
162
+ """
163
+ super(SpkIDLoaderWithPE, self).__init__(utts, hparams, feats_scp, utt2phns, phn2id, utt2phn_duration, utt2spk)
164
+ self.utt2var = self.get_utt2var(var_scp)
165
+
166
+ def get_utt2var(self, utt2var: str) -> dict:
167
+ res = kaldiio.load_scp(utt2var)
168
+ print(f"Succeed reading feats from {utt2var}")
169
+ return res
170
+
171
+ def get_var_from_kaldi(self, utt):
172
+ var = self.utt2var[utt]
173
+ var = torch.FloatTensor(var).squeeze()
174
+ assert 5 in var.shape
175
+ if var.shape[0] == 5:
176
+ return var
177
+ else:
178
+ return var.T
179
+
180
+ def get_mel_text_pair(self, utt):
181
+ # separate filename and text
182
+ spkid = self.utt2spk[utt]
183
+ phn_ids = self.get_text(utt)
184
+ mel = self.get_mel_from_kaldi(utt)
185
+ dur = self.get_dur_from_kaldi(utt)
186
+ var = self.get_var_from_kaldi(utt)
187
+
188
+ assert sum(dur) == mel.shape[1] == var.shape[1], \
189
+ f"Frame length mismatch: utt {utt}, dur: {sum(dur)}, mel: {mel.shape[1]}, var: {var.shape[1]}"
190
+
191
+ res = {
192
+ "utt": utt,
193
+ "phn_ids": phn_ids,
194
+ "mel": mel,
195
+ "dur": dur,
196
+ "spk_ids": spkid,
197
+ "var": var
198
+ }
199
+ return res
200
+
201
+
202
+ class XvectorLoader(BaseLoader):
203
+ def __init__(self, utts: str, hparams, feats_scp: str, utt2phns: str, phn2id: str,
204
+ utt2phn_duration: str, utt2spk_name: str, spk_xvector_scp: str):
205
+ """
206
+ :param utt2spk_name: like kaldi-style utt2spk
207
+ :param spk_xvector_scp: kaldi-style speaker-level xvector.scp
208
+ """
209
+ super(XvectorLoader, self).__init__(utts, hparams, feats_scp, utt2phns, phn2id, utt2phn_duration)
210
+ self.utt2spk = self.get_utt2spk(utt2spk_name)
211
+ self.spk2xvector = self.get_spk2xvector(spk_xvector_scp)
212
+
213
+ def get_utt2spk(self, utt2spk):
214
+ res = dict()
215
+ with open(utt2spk, 'r') as f:
216
+ for l in f.readlines():
217
+ res[l.split()[0]] = l.split()[1]
218
+ return res
219
+
220
+ def get_spk2xvector(self, spk_xvector_scp: str) -> dict:
221
+ res = kaldiio.load_scp(spk_xvector_scp)
222
+ print(f"Succeed reading xvector from {spk_xvector_scp}")
223
+ return res
224
+
225
+ def get_xvector(self, utt):
226
+ xv = self.spk2xvector[self.utt2spk[utt]]
227
+ xv = torch.FloatTensor(xv).squeeze()
228
+ return xv
229
+
230
+ def get_mel_text_pair(self, utt):
231
+ phn_ids = self.get_text(utt)
232
+ mel = self.get_mel_from_kaldi(utt)
233
+ dur = self.get_dur_from_kaldi(utt)
234
+ xvector = self.get_xvector(utt)
235
+
236
+ assert sum(dur) == mel.shape[1], \
237
+ f"Frame length mismatch: utt {utt}, dur: {sum(dur)}, mel: {mel.shape[1]}"
238
+
239
+ res = {
240
+ "utt": utt,
241
+ "phn_ids": phn_ids,
242
+ "mel": mel,
243
+ "dur": dur,
244
+ "xvector": xvector,
245
+ }
246
+ return res
247
+
248
+
249
+ class XvectorLoaderWithPE(BaseLoader):
250
+ def __init__(self, utts: str, hparams, feats_scp: str, utt2phns: str, phn2id: str,
251
+ utt2phn_duration: str, utt2spk_name: str, spk_xvector_scp: str, var_scp: str):
252
+ super(XvectorLoaderWithPE, self).__init__(utts, hparams, feats_scp, utt2phns, phn2id, utt2phn_duration)
253
+ self.utt2spk = self.get_utt2spk(utt2spk_name)
254
+ self.spk2xvector = self.get_spk2xvector(spk_xvector_scp)
255
+ self.utt2var = self.get_utt2var(var_scp)
256
+
257
+ def get_spk2xvector(self, spk_xvector_scp: str) -> dict:
258
+ res = kaldiio.load_scp(spk_xvector_scp)
259
+ print(f"Succeed reading xvector from {spk_xvector_scp}")
260
+ return res
261
+
262
+ def get_utt2spk(self, utt2spk):
263
+ res = dict()
264
+ with open(utt2spk, 'r') as f:
265
+ for l in f.readlines():
266
+ res[l.split()[0]] = l.split()[1]
267
+ return res
268
+
269
+ def get_utt2var(self, utt2var: str) -> dict:
270
+ res = kaldiio.load_scp(utt2var)
271
+ print(f"Succeed reading feats from {utt2var}")
272
+ return res
273
+
274
+ def get_var_from_kaldi(self, utt):
275
+ var = self.utt2var[utt]
276
+ var = torch.FloatTensor(var).squeeze()
277
+ assert 5 in var.shape
278
+ if var.shape[0] == 5:
279
+ return var
280
+ else:
281
+ return var.T
282
+
283
+ def get_xvector(self, utt):
284
+ xv = self.spk2xvector[self.utt2spk[utt]]
285
+ xv = torch.FloatTensor(xv).squeeze()
286
+ return xv
287
+
288
+ def get_mel_text_pair(self, utt):
289
+ # separate filename and text
290
+ spkid = self.utt2spk[utt]
291
+ phn_ids = self.get_text(utt)
292
+ mel = self.get_mel_from_kaldi(utt)
293
+ dur = self.get_dur_from_kaldi(utt)
294
+ var = self.get_var_from_kaldi(utt)
295
+ xvector = self.get_xvector(utt)
296
+
297
+ assert sum(dur) == mel.shape[1] == var.shape[1], \
298
+ f"Frame length mismatch: utt {utt}, dur: {sum(dur)}, mel: {mel.shape[1]}, var: {var.shape[1]}"
299
+
300
+ res = {
301
+ "utt": utt,
302
+ "phn_ids": phn_ids,
303
+ "mel": mel,
304
+ "dur": dur,
305
+ "spk_ids": spkid,
306
+ "var": var,
307
+ "xvector": xvector
308
+ }
309
+ return res
data_preparation.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import kaldiio
2
+ import os
3
+ import librosa
4
+ from tqdm import tqdm
5
+ import glob
6
+ import json
7
+ from shutil import copyfile
8
+ import pandas as pd
9
+ import argparse
10
+ from text import _clean_text, symbols
11
+ from num2words import num2words
12
+ import re
13
+ from melspec import mel_spectrogram
14
+ import torchaudio
15
+
16
+ if __name__ == '__main__':
17
+ parser = argparse.ArgumentParser()
18
+ parser.add_argument('-d', '--data', type=str, required=True, help='path to the emotional dataset')
19
+ args = parser.parse_args()
20
+ dataset_path = args.data
21
+ filelists_path = 'filelists/all_spks/'
22
+ feats_scp_file = filelists_path + 'feats.scp'
23
+ feats_ark_file = filelists_path + 'feats.ark'
24
+
25
+
26
+ spks = ['1263201035', '805570882', '399172782']
27
+ train_files = []
28
+ eval_files = []
29
+ for spk in spks:
30
+ train_files += glob.glob(dataset_path + spk + "/train/*.wav")
31
+ eval_files += glob.glob(dataset_path + spk + "/eval/*.wav")
32
+
33
+ os.makedirs(filelists_path, exist_ok=True)
34
+
35
+ with open(filelists_path + 'train_utts.txt', 'w', encoding='utf-8') as f:
36
+ for wav_path in train_files:
37
+ wav_name = os.path.splitext(os.path.basename(wav_path))[0]
38
+ f.write(wav_name + '\n')
39
+ with open(filelists_path + 'eval_utts.txt', 'w', encoding='utf-8') as f:
40
+ for wav_path in eval_files:
41
+ wav_name = os.path.splitext(os.path.basename(wav_path))[0]
42
+ f.write(wav_name + '\n')
43
+
44
+ with open(feats_scp_file, 'w') as feats_scp, \
45
+ kaldiio.WriteHelper(f'ark,scp:{feats_ark_file},{feats_scp_file}') as writer:
46
+ for root, dirs, files in os.walk(dataset_path):
47
+ for file in tqdm(files):
48
+ if file.endswith('.wav'):
49
+ # Get the file name and relative path to the root folder
50
+ wav_path = os.path.join(root, file)
51
+ rel_path = os.path.relpath(wav_path, dataset_path)
52
+ wav_name = os.path.splitext(os.path.basename(wav_path))[0]
53
+ signal, rate = torchaudio.load(wav_path)
54
+ spec = mel_spectrogram(signal, 1024, 80, 22050, 256,
55
+ 1024, 0, 8000, center=False).squeeze()
56
+ # Write the features to feats.ark and feats.scp
57
+ writer[wav_name] = spec
58
+
59
+
60
+ emotions = [os.path.basename(x).split("_")[1] for x in glob.glob(dataset_path + '/**/**/*')]
61
+ emotions = sorted(set(emotions))
62
+
63
+ utt2spk = {}
64
+ utt2emo = {}
65
+ wavs = glob.glob(dataset_path + '**/**/*.wav')
66
+ for wav_path in tqdm(wavs):
67
+ wav_name = os.path.splitext(os.path.basename(wav_path))[0]
68
+ emotion = emotions.index(wav_name.split("_")[1])
69
+ if wav_path.split('/')[-3] == '1263201035':
70
+ spk = 0 ## labels should start with 0
71
+ elif wav_path.split('/')[-3] == '805570882':
72
+ spk = 1
73
+ else:
74
+ spk = 2
75
+ utt2spk[wav_name] = str(spk)
76
+ utt2emo[wav_name] = str(emotion)
77
+ utt2spk = dict(sorted(utt2spk.items()))
78
+ utt2emo = dict(sorted(utt2emo.items()))
79
+
80
+ with open(filelists_path + 'utt2emo.json', 'w') as fp:
81
+ json.dump(utt2emo, fp, indent=4)
82
+ with open(filelists_path + 'utt2spk.json', 'w') as fp:
83
+ json.dump(utt2spk, fp, indent=4)
84
+
85
+ txt_files = sorted(glob.glob(dataset_path + '/**/**/*.txt'))
86
+ count = 0
87
+ txt = []
88
+ basenames = []
89
+ utt2text = {}
90
+ flag = False
91
+ with open(filelists_path + 'text', 'w', encoding='utf-8') as write:
92
+ for txt_path in txt_files:
93
+ basename = os.path.basename(txt_path).replace('.txt', '')
94
+ with open(txt_path, 'r', encoding='utf-8') as f:
95
+ txt.append(_clean_text(f.read().strip("\n"), cleaner_names=["kazakh_cleaners"]).replace("'", ""))
96
+ basenames.append(basename)
97
+ output_string = [re.sub('(\d+)', lambda m: num2words(m.group(), lang='kz'), sentence) for sentence in txt]
98
+ cleaned_txt = []
99
+ for t in output_string:
100
+ cleaned_txt.append(''.join([s for s in t if s in symbols]))
101
+ utt2text = {basenames[i]: cleaned_txt[i] for i in range(len(cleaned_txt))}
102
+ utt2text = dict(sorted(utt2text.items()))
103
+
104
+ vocab = set()
105
+ with open(filelists_path + '/text', 'w', encoding='utf-8') as f:
106
+ for x, y in utt2text.items():
107
+ for c in y: vocab.add(c)
108
+ f.write(x + ' ' + y + '\n')
env.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+
4
+
5
+ class AttrDict(dict):
6
+ def __init__(self, *args, **kwargs):
7
+ super(AttrDict, self).__init__(*args, **kwargs)
8
+ self.__dict__ = self
9
+
10
+
11
+ def build_env(config, config_name, path):
12
+ t_path = os.path.join(path, config_name)
13
+ if config != t_path:
14
+ os.makedirs(path, exist_ok=True)
15
+ shutil.copyfile(config, os.path.join(path, config_name))
filelists/all_spks/eval_utts.txt ADDED
@@ -0,0 +1,3730 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1263201035_surprise_47861
2
+ 1263201035_angry_49695
3
+ 1263201035_happy_52657
4
+ 1263201035_surprise_40339
5
+ 1263201035_angry_7782
6
+ 1263201035_sad_48693
7
+ 1263201035_happy_50748
8
+ 1263201035_fear_53711
9
+ 1263201035_sad_73370
10
+ 1263201035_surprise_40186
11
+ 1263201035_neutral_40342
12
+ 1263201035_happy_66930
13
+ 1263201035_fear_67139
14
+ 1263201035_angry_66597
15
+ 1263201035_sad_11219
16
+ 1263201035_neutral_53029
17
+ 1263201035_sad_51009
18
+ 1263201035_happy_31206
19
+ 1263201035_fear_34063
20
+ 1263201035_neutral_75999
21
+ 1263201035_angry_33304
22
+ 1263201035_angry_33668
23
+ 1263201035_angry_29852
24
+ 1263201035_sad_73176
25
+ 1263201035_fear_8425
26
+ 1263201035_fear_40145
27
+ 1263201035_sad_73359
28
+ 1263201035_sad_31284
29
+ 1263201035_fear_49804
30
+ 1263201035_fear_30407
31
+ 1263201035_surprise_66220
32
+ 1263201035_angry_49725
33
+ 1263201035_angry_75638
34
+ 1263201035_neutral_67147
35
+ 1263201035_angry_34047
36
+ 1263201035_surprise_51008
37
+ 1263201035_fear_40255
38
+ 1263201035_happy_40037
39
+ 1263201035_sad_40178
40
+ 1263201035_sad_49944
41
+ 1263201035_neutral_52556
42
+ 1263201035_sad_32732
43
+ 1263201035_angry_67696
44
+ 1263201035_neutral_52335
45
+ 1263201035_surprise_33940
46
+ 1263201035_surprise_31598
47
+ 1263201035_happy_33813
48
+ 1263201035_happy_52477
49
+ 1263201035_neutral_8508
50
+ 1263201035_happy_22572
51
+ 1263201035_neutral_34101
52
+ 1263201035_happy_4851
53
+ 1263201035_angry_33831
54
+ 1263201035_happy_47841
55
+ 1263201035_angry_48038
56
+ 1263201035_angry_4986
57
+ 1263201035_surprise_29809
58
+ 1263201035_sad_8464
59
+ 1263201035_fear_47763
60
+ 1263201035_happy_29035
61
+ 1263201035_fear_16121
62
+ 1263201035_neutral_4901
63
+ 1263201035_surprise_51233
64
+ 1263201035_sad_33897
65
+ 1263201035_happy_50855
66
+ 1263201035_neutral_31468
67
+ 1263201035_happy_5320
68
+ 1263201035_neutral_66915
69
+ 1263201035_sad_67518
70
+ 1263201035_sad_5335
71
+ 1263201035_sad_52022
72
+ 1263201035_neutral_42295
73
+ 1263201035_happy_63621
74
+ 1263201035_happy_40137
75
+ 1263201035_happy_33655
76
+ 1263201035_neutral_66444
77
+ 1263201035_fear_66382
78
+ 1263201035_surprise_72795
79
+ 1263201035_happy_30297
80
+ 1263201035_angry_8319
81
+ 1263201035_neutral_65132
82
+ 1263201035_happy_31356
83
+ 1263201035_neutral_39066
84
+ 1263201035_surprise_47951
85
+ 1263201035_happy_33626
86
+ 1263201035_sad_50658
87
+ 1263201035_sad_53223
88
+ 1263201035_happy_30012
89
+ 1263201035_fear_40340
90
+ 1263201035_angry_15112
91
+ 1263201035_happy_67544
92
+ 1263201035_happy_51390
93
+ 1263201035_angry_51499
94
+ 1263201035_sad_29077
95
+ 1263201035_angry_75293
96
+ 1263201035_angry_16061
97
+ 1263201035_surprise_47777
98
+ 1263201035_happy_72797
99
+ 1263201035_angry_30428
100
+ 1263201035_neutral_53208
101
+ 1263201035_angry_53197
102
+ 1263201035_happy_75643
103
+ 1263201035_neutral_10824
104
+ 1263201035_happy_49642
105
+ 1263201035_surprise_29714
106
+ 1263201035_sad_50636
107
+ 1263201035_happy_10510
108
+ 1263201035_sad_33891
109
+ 1263201035_happy_52624
110
+ 1263201035_neutral_67354
111
+ 1263201035_angry_48056
112
+ 1263201035_surprise_75526
113
+ 1263201035_sad_75797
114
+ 1263201035_fear_4619
115
+ 1263201035_fear_67891
116
+ 1263201035_sad_31528
117
+ 1263201035_happy_32640
118
+ 1263201035_neutral_4164
119
+ 1263201035_neutral_8352
120
+ 1263201035_surprise_50242
121
+ 1263201035_sad_4376
122
+ 1263201035_sad_50810
123
+ 1263201035_neutral_51154
124
+ 1263201035_neutral_4966
125
+ 1263201035_fear_49929
126
+ 1263201035_angry_53663
127
+ 1263201035_angry_38901
128
+ 1263201035_surprise_9306
129
+ 1263201035_surprise_29785
130
+ 1263201035_angry_32730
131
+ 1263201035_happy_30869
132
+ 1263201035_angry_75682
133
+ 1263201035_happy_75952
134
+ 1263201035_fear_4576
135
+ 1263201035_neutral_63328
136
+ 1263201035_fear_50244
137
+ 1263201035_surprise_51865
138
+ 1263201035_fear_22543
139
+ 1263201035_angry_73324
140
+ 1263201035_sad_72922
141
+ 1263201035_fear_29157
142
+ 1263201035_fear_31299
143
+ 1263201035_neutral_38837
144
+ 1263201035_neutral_29108
145
+ 1263201035_neutral_52324
146
+ 1263201035_neutral_5031
147
+ 1263201035_surprise_75767
148
+ 1263201035_surprise_52615
149
+ 1263201035_happy_39662
150
+ 1263201035_neutral_30912
151
+ 1263201035_fear_72966
152
+ 1263201035_neutral_39697
153
+ 1263201035_happy_29865
154
+ 1263201035_sad_52970
155
+ 1263201035_sad_53429
156
+ 1263201035_angry_38884
157
+ 1263201035_surprise_66140
158
+ 1263201035_angry_39846
159
+ 1263201035_happy_10659
160
+ 1263201035_happy_40112
161
+ 1263201035_neutral_7703
162
+ 1263201035_happy_32874
163
+ 1263201035_fear_63714
164
+ 1263201035_sad_53877
165
+ 1263201035_fear_5013
166
+ 1263201035_fear_29773
167
+ 1263201035_happy_5066
168
+ 1263201035_sad_38392
169
+ 1263201035_surprise_39435
170
+ 1263201035_angry_32501
171
+ 1263201035_angry_73202
172
+ 1263201035_sad_11121
173
+ 1263201035_angry_66478
174
+ 1263201035_angry_75908
175
+ 1263201035_happy_9694
176
+ 1263201035_surprise_11194
177
+ 1263201035_sad_5051
178
+ 1263201035_angry_32308
179
+ 1263201035_surprise_5246
180
+ 1263201035_angry_74571
181
+ 1263201035_angry_75150
182
+ 1263201035_angry_16113
183
+ 1263201035_neutral_30884
184
+ 1263201035_surprise_33211
185
+ 1263201035_happy_5227
186
+ 1263201035_fear_15372
187
+ 1263201035_surprise_32862
188
+ 1263201035_neutral_53264
189
+ 1263201035_fear_66575
190
+ 1263201035_neutral_50966
191
+ 1263201035_angry_39916
192
+ 1263201035_happy_63102
193
+ 1263201035_surprise_51553
194
+ 1263201035_angry_39384
195
+ 1263201035_angry_15549
196
+ 1263201035_angry_33817
197
+ 1263201035_angry_10574
198
+ 1263201035_neutral_52331
199
+ 1263201035_sad_33469
200
+ 1263201035_fear_52564
201
+ 1263201035_fear_8481
202
+ 1263201035_sad_10794
203
+ 1263201035_fear_12090
204
+ 1263201035_happy_31695
205
+ 1263201035_sad_34144
206
+ 1263201035_surprise_75196
207
+ 1263201035_angry_30083
208
+ 1263201035_surprise_22350
209
+ 1263201035_happy_4269
210
+ 1263201035_angry_29368
211
+ 1263201035_happy_48044
212
+ 1263201035_neutral_30394
213
+ 1263201035_sad_74919
214
+ 1263201035_sad_47862
215
+ 1263201035_fear_73319
216
+ 1263201035_sad_50492
217
+ 1263201035_angry_53647
218
+ 1263201035_angry_30492
219
+ 1263201035_fear_51381
220
+ 1263201035_neutral_72947
221
+ 1263201035_sad_75433
222
+ 1263201035_neutral_5895
223
+ 1263201035_surprise_12083
224
+ 1263201035_sad_67908
225
+ 1263201035_surprise_39408
226
+ 1263201035_sad_72934
227
+ 1263201035_happy_66950
228
+ 1263201035_happy_67808
229
+ 1263201035_angry_32407
230
+ 1263201035_sad_49959
231
+ 1263201035_happy_51697
232
+ 1263201035_fear_53931
233
+ 1263201035_angry_67344
234
+ 1263201035_sad_8541
235
+ 1263201035_angry_32480
236
+ 1263201035_happy_66906
237
+ 1263201035_neutral_33447
238
+ 1263201035_happy_32179
239
+ 1263201035_neutral_30333
240
+ 1263201035_fear_30788
241
+ 1263201035_surprise_51693
242
+ 1263201035_sad_51616
243
+ 1263201035_neutral_52096
244
+ 1263201035_angry_33956
245
+ 1263201035_angry_66388
246
+ 1263201035_fear_39108
247
+ 1263201035_surprise_63535
248
+ 1263201035_surprise_50326
249
+ 1263201035_neutral_5142
250
+ 1263201035_neutral_30199
251
+ 1263201035_surprise_75829
252
+ 1263201035_surprise_5900
253
+ 1263201035_neutral_39387
254
+ 1263201035_happy_10858
255
+ 1263201035_fear_52243
256
+ 1263201035_neutral_52010
257
+ 1263201035_angry_63199
258
+ 1263201035_happy_49874
259
+ 1263201035_angry_48000
260
+ 1263201035_sad_8313
261
+ 1263201035_happy_66343
262
+ 1263201035_angry_63130
263
+ 1263201035_neutral_32809
264
+ 1263201035_sad_39396
265
+ 1263201035_angry_22986
266
+ 1263201035_neutral_42307
267
+ 1263201035_angry_72815
268
+ 1263201035_angry_67240
269
+ 1263201035_surprise_47957
270
+ 1263201035_angry_49812
271
+ 1263201035_angry_65040
272
+ 1263201035_neutral_50844
273
+ 1263201035_happy_39561
274
+ 1263201035_neutral_66254
275
+ 1263201035_angry_4584
276
+ 1263201035_fear_75581
277
+ 1263201035_surprise_40034
278
+ 1263201035_sad_32638
279
+ 1263201035_angry_31511
280
+ 1263201035_fear_40023
281
+ 1263201035_angry_66319
282
+ 1263201035_sad_7712
283
+ 1263201035_sad_38422
284
+ 1263201035_fear_49782
285
+ 1263201035_happy_30429
286
+ 1263201035_surprise_66374
287
+ 1263201035_neutral_31308
288
+ 1263201035_surprise_52893
289
+ 1263201035_happy_34030
290
+ 1263201035_sad_31258
291
+ 1263201035_happy_17691
292
+ 1263201035_happy_50449
293
+ 1263201035_surprise_21744
294
+ 1263201035_surprise_51942
295
+ 1263201035_happy_29092
296
+ 1263201035_angry_38434
297
+ 1263201035_happy_30887
298
+ 1263201035_neutral_40141
299
+ 1263201035_neutral_73264
300
+ 1263201035_angry_74880
301
+ 1263201035_surprise_51396
302
+ 1263201035_angry_31619
303
+ 1263201035_neutral_50160
304
+ 1263201035_happy_30843
305
+ 1263201035_angry_66937
306
+ 1263201035_surprise_48633
307
+ 1263201035_neutral_50437
308
+ 1263201035_fear_32622
309
+ 1263201035_sad_33602
310
+ 1263201035_sad_10693
311
+ 1263201035_angry_30010
312
+ 1263201035_sad_37431
313
+ 1263201035_sad_53370
314
+ 1263201035_sad_31638
315
+ 1263201035_sad_5883
316
+ 1263201035_sad_11196
317
+ 1263201035_angry_33837
318
+ 1263201035_neutral_15588
319
+ 1263201035_fear_51092
320
+ 1263201035_angry_12025
321
+ 1263201035_neutral_31454
322
+ 1263201035_surprise_51114
323
+ 1263201035_angry_38674
324
+ 1263201035_surprise_75241
325
+ 1263201035_angry_51729
326
+ 1263201035_surprise_51011
327
+ 1263201035_angry_63670
328
+ 1263201035_happy_4582
329
+ 1263201035_sad_15132
330
+ 1263201035_fear_74586
331
+ 1263201035_neutral_22964
332
+ 1263201035_neutral_66490
333
+ 1263201035_angry_39989
334
+ 1263201035_neutral_30098
335
+ 1263201035_fear_33875
336
+ 1263201035_surprise_73089
337
+ 1263201035_angry_67716
338
+ 1263201035_neutral_63424
339
+ 1263201035_fear_31191
340
+ 1263201035_happy_50178
341
+ 1263201035_neutral_48535
342
+ 1263201035_sad_29939
343
+ 1263201035_surprise_52514
344
+ 1263201035_surprise_14772
345
+ 1263201035_neutral_33820
346
+ 1263201035_neutral_51319
347
+ 1263201035_fear_19112
348
+ 1263201035_sad_30896
349
+ 1263201035_angry_38773
350
+ 1263201035_surprise_49855
351
+ 1263201035_angry_67517
352
+ 1263201035_sad_52937
353
+ 1263201035_sad_34274
354
+ 1263201035_angry_38860
355
+ 1263201035_angry_67267
356
+ 1263201035_fear_39004
357
+ 1263201035_happy_38922
358
+ 1263201035_surprise_32569
359
+ 1263201035_happy_32071
360
+ 1263201035_neutral_52193
361
+ 1263201035_fear_40222
362
+ 1263201035_sad_30995
363
+ 1263201035_neutral_42293
364
+ 1263201035_happy_48597
365
+ 1263201035_fear_74644
366
+ 1263201035_angry_4478
367
+ 1263201035_fear_4473
368
+ 1263201035_happy_52573
369
+ 1263201035_happy_31639
370
+ 1263201035_fear_67880
371
+ 1263201035_fear_63680
372
+ 1263201035_neutral_8662
373
+ 1263201035_neutral_11011
374
+ 1263201035_sad_51190
375
+ 1263201035_angry_67476
376
+ 1263201035_sad_66335
377
+ 1263201035_neutral_38929
378
+ 1263201035_surprise_4976
379
+ 1263201035_surprise_4181
380
+ 1263201035_angry_75223
381
+ 1263201035_fear_51118
382
+ 1263201035_fear_33610
383
+ 1263201035_sad_73105
384
+ 1263201035_sad_75988
385
+ 1263201035_happy_52925
386
+ 1263201035_surprise_73234
387
+ 1263201035_fear_8574
388
+ 1263201035_happy_10865
389
+ 1263201035_neutral_42311
390
+ 1263201035_sad_10413
391
+ 1263201035_happy_30812
392
+ 1263201035_neutral_29997
393
+ 1263201035_neutral_10538
394
+ 1263201035_happy_72962
395
+ 1263201035_fear_32169
396
+ 1263201035_sad_31600
397
+ 1263201035_fear_51048
398
+ 1263201035_happy_67949
399
+ 1263201035_happy_15173
400
+ 1263201035_surprise_39861
401
+ 1263201035_sad_53536
402
+ 1263201035_surprise_67765
403
+ 1263201035_happy_39074
404
+ 1263201035_neutral_50745
405
+ 1263201035_surprise_52274
406
+ 1263201035_angry_38765
407
+ 1263201035_angry_33487
408
+ 1263201035_neutral_73005
409
+ 1263201035_fear_32574
410
+ 1263201035_neutral_50283
411
+ 1263201035_fear_74772
412
+ 1263201035_angry_53856
413
+ 1263201035_angry_53403
414
+ 1263201035_sad_66511
415
+ 1263201035_sad_31329
416
+ 1263201035_surprise_75834
417
+ 1263201035_sad_66349
418
+ 1263201035_angry_38866
419
+ 1263201035_angry_8642
420
+ 1263201035_happy_4672
421
+ 1263201035_neutral_30997
422
+ 1263201035_neutral_51490
423
+ 1263201035_happy_15567
424
+ 1263201035_surprise_75842
425
+ 1263201035_happy_40352
426
+ 1263201035_happy_50029
427
+ 1263201035_surprise_11414
428
+ 1263201035_fear_73349
429
+ 1263201035_neutral_53602
430
+ 1263201035_neutral_72894
431
+ 1263201035_surprise_75663
432
+ 1263201035_surprise_53224
433
+ 1263201035_happy_16039
434
+ 1263201035_happy_30270
435
+ 1263201035_sad_31155
436
+ 1263201035_angry_5917
437
+ 1263201035_angry_11185
438
+ 1263201035_happy_50051
439
+ 1263201035_angry_4802
440
+ 1263201035_neutral_51275
441
+ 1263201035_angry_10726
442
+ 1263201035_angry_50739
443
+ 1263201035_neutral_51548
444
+ 1263201035_neutral_63465
445
+ 1263201035_fear_4140
446
+ 1263201035_happy_17671
447
+ 1263201035_surprise_51062
448
+ 1263201035_neutral_38910
449
+ 1263201035_angry_33486
450
+ 1263201035_surprise_19100
451
+ 1263201035_neutral_67662
452
+ 1263201035_happy_66974
453
+ 1263201035_fear_51238
454
+ 1263201035_angry_52782
455
+ 1263201035_surprise_9292
456
+ 1263201035_happy_51591
457
+ 1263201035_neutral_66318
458
+ 1263201035_sad_29226
459
+ 1263201035_happy_51129
460
+ 1263201035_fear_75378
461
+ 1263201035_happy_39023
462
+ 1263201035_neutral_39436
463
+ 1263201035_fear_10860
464
+ 1263201035_neutral_40155
465
+ 1263201035_angry_32592
466
+ 1263201035_angry_29315
467
+ 1263201035_neutral_51084
468
+ 1263201035_angry_50960
469
+ 1263201035_fear_47808
470
+ 1263201035_happy_8607
471
+ 1263201035_sad_11327
472
+ 1263201035_surprise_52222
473
+ 1263201035_fear_29913
474
+ 1263201035_sad_5146
475
+ 1263201035_neutral_30035
476
+ 1263201035_surprise_30070
477
+ 1263201035_fear_75890
478
+ 1263201035_surprise_73318
479
+ 1263201035_sad_50046
480
+ 1263201035_happy_52952
481
+ 1263201035_surprise_38590
482
+ 1263201035_neutral_53267
483
+ 1263201035_sad_11090
484
+ 1263201035_angry_51738
485
+ 1263201035_surprise_22919
486
+ 1263201035_happy_30616
487
+ 1263201035_fear_29919
488
+ 1263201035_angry_7795
489
+ 1263201035_happy_47740
490
+ 1263201035_surprise_61359
491
+ 1263201035_angry_50154
492
+ 1263201035_sad_38403
493
+ 1263201035_angry_49747
494
+ 1263201035_happy_8980
495
+ 1263201035_neutral_29259
496
+ 1263201035_happy_50894
497
+ 1263201035_neutral_38427
498
+ 1263201035_angry_32762
499
+ 1263201035_fear_32611
500
+ 1263201035_surprise_15625
501
+ 1263201035_angry_5941
502
+ 1263201035_surprise_4634
503
+ 1263201035_happy_29902
504
+ 1263201035_surprise_30956
505
+ 1263201035_angry_29974
506
+ 1263201035_angry_15543
507
+ 1263201035_sad_34067
508
+ 1263201035_angry_38994
509
+ 1263201035_happy_31659
510
+ 1263201035_sad_49751
511
+ 1263201035_neutral_42323
512
+ 1263201035_happy_15179
513
+ 1263201035_neutral_42271
514
+ 1263201035_surprise_33955
515
+ 1263201035_surprise_29371
516
+ 1263201035_happy_40384
517
+ 1263201035_happy_67210
518
+ 1263201035_surprise_50591
519
+ 1263201035_surprise_10695
520
+ 1263201035_fear_52784
521
+ 1263201035_angry_31103
522
+ 1263201035_neutral_33319
523
+ 1263201035_surprise_67540
524
+ 1263201035_sad_33438
525
+ 1263201035_sad_34190
526
+ 1263201035_angry_37389
527
+ 1263201035_angry_67648
528
+ 1263201035_sad_9261
529
+ 1263201035_surprise_51613
530
+ 1263201035_surprise_34010
531
+ 1263201035_fear_52470
532
+ 1263201035_surprise_75791
533
+ 1263201035_sad_50236
534
+ 1263201035_happy_21755
535
+ 1263201035_happy_74631
536
+ 1263201035_sad_10810
537
+ 1263201035_angry_4649
538
+ 1263201035_happy_10626
539
+ 1263201035_angry_32304
540
+ 1263201035_happy_32067
541
+ 1263201035_happy_50837
542
+ 1263201035_sad_75411
543
+ 1263201035_fear_30475
544
+ 1263201035_happy_32041
545
+ 1263201035_happy_52415
546
+ 1263201035_surprise_4047
547
+ 1263201035_surprise_54005
548
+ 1263201035_neutral_49754
549
+ 1263201035_happy_29275
550
+ 1263201035_neutral_50333
551
+ 1263201035_fear_53725
552
+ 1263201035_neutral_52524
553
+ 1263201035_fear_50916
554
+ 1263201035_fear_5315
555
+ 1263201035_happy_52798
556
+ 1263201035_neutral_52196
557
+ 1263201035_happy_33938
558
+ 1263201035_fear_33739
559
+ 1263201035_angry_11462
560
+ 1263201035_sad_51566
561
+ 1263201035_happy_30105
562
+ 1263201035_happy_11070
563
+ 1263201035_sad_9305
564
+ 1263201035_fear_39009
565
+ 1263201035_neutral_39391
566
+ 1263201035_neutral_10809
567
+ 1263201035_angry_10619
568
+ 1263201035_happy_30769
569
+ 1263201035_sad_10930
570
+ 1263201035_sad_39660
571
+ 1263201035_sad_38761
572
+ 1263201035_happy_52390
573
+ 1263201035_happy_33704
574
+ 1263201035_sad_10674
575
+ 1263201035_happy_39114
576
+ 1263201035_sad_63610
577
+ 1263201035_angry_29236
578
+ 1263201035_neutral_75306
579
+ 1263201035_sad_49859
580
+ 1263201035_happy_66954
581
+ 1263201035_sad_4561
582
+ 1263201035_neutral_67885
583
+ 1263201035_surprise_49932
584
+ 1263201035_fear_66572
585
+ 1263201035_fear_40182
586
+ 1263201035_fear_39663
587
+ 1263201035_fear_33880
588
+ 1263201035_happy_67859
589
+ 1263201035_angry_33432
590
+ 1263201035_surprise_38825
591
+ 1263201035_angry_8461
592
+ 1263201035_fear_38655
593
+ 1263201035_neutral_65129
594
+ 1263201035_surprise_30470
595
+ 1263201035_neutral_67230
596
+ 1263201035_surprise_50412
597
+ 1263201035_fear_52299
598
+ 1263201035_angry_47800
599
+ 1263201035_surprise_66201
600
+ 1263201035_surprise_50598
601
+ 1263201035_fear_37415
602
+ 1263201035_fear_38518
603
+ 1263201035_angry_11491
604
+ 1263201035_surprise_32608
605
+ 1263201035_sad_34053
606
+ 1263201035_neutral_51911
607
+ 1263201035_angry_53184
608
+ 1263201035_neutral_51589
609
+ 1263201035_fear_67342
610
+ 1263201035_surprise_52953
611
+ 1263201035_sad_4605
612
+ 1263201035_sad_73258
613
+ 1263201035_neutral_66608
614
+ 1263201035_angry_52674
615
+ 1263201035_angry_67578
616
+ 1263201035_sad_51155
617
+ 1263201035_sad_4606
618
+ 1263201035_angry_29223
619
+ 1263201035_neutral_73013
620
+ 1263201035_fear_52685
621
+ 1263201035_neutral_5283
622
+ 1263201035_sad_32334
623
+ 1263201035_angry_75302
624
+ 1263201035_happy_49920
625
+ 1263201035_happy_54008
626
+ 1263201035_neutral_67061
627
+ 1263201035_sad_49797
628
+ 1263201035_surprise_51156
629
+ 1263201035_angry_8672
630
+ 1263201035_happy_63096
631
+ 1263201035_surprise_53457
632
+ 1263201035_neutral_30327
633
+ 1263201035_surprise_40375
634
+ 1263201035_happy_10936
635
+ 1263201035_surprise_52478
636
+ 1263201035_fear_4357
637
+ 1263201035_surprise_52205
638
+ 1263201035_neutral_31415
639
+ 1263201035_sad_53522
640
+ 1263201035_fear_66924
641
+ 1263201035_happy_47732
642
+ 1263201035_surprise_50467
643
+ 1263201035_surprise_48600
644
+ 1263201035_happy_39388
645
+ 1263201035_surprise_29915
646
+ 1263201035_sad_5050
647
+ 1263201035_surprise_75795
648
+ 1263201035_sad_51383
649
+ 1263201035_happy_4064
650
+ 1263201035_fear_50726
651
+ 1263201035_angry_63173
652
+ 1263201035_happy_32433
653
+ 1263201035_sad_10906
654
+ 1263201035_sad_10473
655
+ 1263201035_angry_4508
656
+ 1263201035_sad_30183
657
+ 1263201035_happy_51659
658
+ 1263201035_fear_53855
659
+ 1263201035_angry_31510
660
+ 1263201035_sad_51020
661
+ 1263201035_angry_53929
662
+ 1263201035_sad_5950
663
+ 1263201035_surprise_38767
664
+ 1263201035_happy_53738
665
+ 1263201035_sad_4135
666
+ 1263201035_sad_49904
667
+ 1263201035_happy_38638
668
+ 1263201035_sad_30370
669
+ 1263201035_surprise_51560
670
+ 1263201035_sad_38692
671
+ 1263201035_happy_74798
672
+ 1263201035_neutral_51647
673
+ 1263201035_surprise_4127
674
+ 1263201035_neutral_10877
675
+ 1263201035_fear_63567
676
+ 1263201035_happy_47925
677
+ 1263201035_neutral_51950
678
+ 1263201035_angry_74630
679
+ 1263201035_neutral_32506
680
+ 1263201035_sad_49731
681
+ 1263201035_neutral_51998
682
+ 1263201035_angry_10616
683
+ 1263201035_angry_30147
684
+ 1263201035_happy_40011
685
+ 1263201035_angry_74777
686
+ 1263201035_happy_10970
687
+ 1263201035_neutral_75328
688
+ 1263201035_fear_75282
689
+ 1263201035_neutral_29926
690
+ 1263201035_surprise_72885
691
+ 1263201035_sad_10480
692
+ 1263201035_happy_50866
693
+ 1263201035_neutral_67096
694
+ 1263201035_happy_39940
695
+ 1263201035_sad_67938
696
+ 1263201035_fear_33987
697
+ 1263201035_neutral_50587
698
+ 1263201035_sad_39223
699
+ 1263201035_sad_33473
700
+ 1263201035_neutral_52616
701
+ 1263201035_fear_52371
702
+ 1263201035_sad_67174
703
+ 1263201035_fear_15385
704
+ 1263201035_fear_4320
705
+ 1263201035_sad_39368
706
+ 1263201035_angry_39609
707
+ 1263201035_neutral_52836
708
+ 1263201035_happy_5200
709
+ 1263201035_angry_51696
710
+ 1263201035_fear_50801
711
+ 1263201035_surprise_53958
712
+ 1263201035_surprise_4593
713
+ 1263201035_surprise_53922
714
+ 1263201035_fear_48016
715
+ 1263201035_angry_38926
716
+ 1263201035_fear_66482
717
+ 1263201035_sad_5092
718
+ 1263201035_fear_32490
719
+ 1263201035_angry_39629
720
+ 1263201035_neutral_52601
721
+ 1263201035_angry_33507
722
+ 1263201035_sad_34127
723
+ 1263201035_surprise_12029
724
+ 1263201035_surprise_42342
725
+ 1263201035_neutral_29846
726
+ 1263201035_fear_39213
727
+ 1263201035_neutral_11120
728
+ 1263201035_happy_39822
729
+ 1263201035_fear_39381
730
+ 1263201035_fear_39017
731
+ 1263201035_happy_73224
732
+ 1263201035_sad_61379
733
+ 1263201035_happy_10957
734
+ 1263201035_angry_51352
735
+ 1263201035_fear_32160
736
+ 1263201035_fear_74891
737
+ 1263201035_surprise_66547
738
+ 1263201035_fear_9627
739
+ 1263201035_sad_22324
740
+ 1263201035_fear_15666
741
+ 1263201035_surprise_10849
742
+ 1263201035_neutral_75450
743
+ 1263201035_angry_15456
744
+ 1263201035_angry_50370
745
+ 1263201035_sad_47926
746
+ 1263201035_surprise_74710
747
+ 1263201035_surprise_37413
748
+ 1263201035_fear_10891
749
+ 1263201035_angry_67451
750
+ 1263201035_angry_39127
751
+ 1263201035_surprise_37430
752
+ 1263201035_happy_75968
753
+ 1263201035_fear_12057
754
+ 1263201035_surprise_8355
755
+ 1263201035_happy_32446
756
+ 1263201035_angry_67687
757
+ 1263201035_happy_11047
758
+ 1263201035_happy_67814
759
+ 1263201035_surprise_39356
760
+ 1263201035_neutral_67629
761
+ 1263201035_surprise_37426
762
+ 1263201035_happy_39470
763
+ 1263201035_neutral_63631
764
+ 1263201035_happy_8625
765
+ 1263201035_happy_4360
766
+ 1263201035_neutral_4748
767
+ 1263201035_neutral_30972
768
+ 1263201035_angry_4056
769
+ 1263201035_happy_75468
770
+ 1263201035_surprise_50870
771
+ 1263201035_sad_72913
772
+ 1263201035_angry_75283
773
+ 1263201035_angry_22399
774
+ 1263201035_happy_11411
775
+ 1263201035_fear_30712
776
+ 1263201035_angry_67734
777
+ 1263201035_fear_53207
778
+ 1263201035_surprise_34014
779
+ 1263201035_sad_32097
780
+ 1263201035_surprise_32499
781
+ 1263201035_angry_51841
782
+ 1263201035_sad_48092
783
+ 1263201035_angry_51604
784
+ 1263201035_surprise_32639
785
+ 1263201035_happy_29322
786
+ 1263201035_angry_50800
787
+ 1263201035_angry_38585
788
+ 1263201035_sad_8663
789
+ 1263201035_neutral_73055
790
+ 1263201035_neutral_30175
791
+ 1263201035_surprise_10746
792
+ 1263201035_fear_67987
793
+ 1263201035_neutral_38696
794
+ 1263201035_happy_4111
795
+ 1263201035_sad_53482
796
+ 1263201035_neutral_67903
797
+ 1263201035_happy_10908
798
+ 1263201035_neutral_39744
799
+ 1263201035_neutral_53926
800
+ 1263201035_neutral_8659
801
+ 1263201035_sad_29120
802
+ 1263201035_surprise_50710
803
+ 1263201035_fear_51120
804
+ 1263201035_happy_4935
805
+ 1263201035_angry_48072
806
+ 1263201035_neutral_22526
807
+ 1263201035_surprise_53727
808
+ 1263201035_neutral_48696
809
+ 1263201035_happy_11025
810
+ 1263201035_fear_32724
811
+ 1263201035_neutral_51717
812
+ 1263201035_fear_67373
813
+ 1263201035_surprise_47816
814
+ 1263201035_surprise_4204
815
+ 1263201035_angry_4557
816
+ 1263201035_surprise_63522
817
+ 1263201035_fear_51763
818
+ 1263201035_angry_51357
819
+ 1263201035_fear_52253
820
+ 1263201035_sad_39630
821
+ 1263201035_angry_67542
822
+ 1263201035_neutral_52922
823
+ 1263201035_sad_50038
824
+ 1263201035_happy_21771
825
+ 1263201035_fear_67007
826
+ 1263201035_sad_52036
827
+ 1263201035_happy_63404
828
+ 1263201035_happy_38856
829
+ 1263201035_happy_53465
830
+ 1263201035_angry_10682
831
+ 1263201035_neutral_38970
832
+ 1263201035_happy_9683
833
+ 1263201035_angry_29335
834
+ 1263201035_neutral_66557
835
+ 1263201035_angry_22385
836
+ 1263201035_surprise_4495
837
+ 1263201035_sad_53679
838
+ 1263201035_sad_30850
839
+ 1263201035_angry_30435
840
+ 1263201035_happy_29127
841
+ 1263201035_sad_39093
842
+ 1263201035_surprise_10658
843
+ 1263201035_fear_30336
844
+ 1263201035_neutral_48079
845
+ 1263201035_happy_50610
846
+ 1263201035_fear_66556
847
+ 1263201035_fear_49637
848
+ 1263201035_sad_51215
849
+ 1263201035_neutral_40024
850
+ 1263201035_happy_52792
851
+ 1263201035_sad_11119
852
+ 1263201035_neutral_8440
853
+ 1263201035_fear_50821
854
+ 1263201035_fear_53650
855
+ 1263201035_sad_53425
856
+ 1263201035_angry_53700
857
+ 1263201035_neutral_52044
858
+ 1263201035_happy_22580
859
+ 1263201035_fear_22364
860
+ 1263201035_happy_8679
861
+ 1263201035_sad_53988
862
+ 1263201035_neutral_31413
863
+ 1263201035_fear_40249
864
+ 1263201035_sad_37440
865
+ 1263201035_neutral_33742
866
+ 1263201035_happy_31053
867
+ 1263201035_neutral_8499
868
+ 1263201035_angry_34247
869
+ 1263201035_angry_11311
870
+ 1263201035_surprise_39003
871
+ 1263201035_neutral_52969
872
+ 1263201035_fear_72869
873
+ 1263201035_fear_7708
874
+ 1263201035_angry_51363
875
+ 1263201035_sad_30397
876
+ 1263201035_surprise_34161
877
+ 1263201035_surprise_53719
878
+ 1263201035_surprise_12099
879
+ 1263201035_fear_10556
880
+ 1263201035_surprise_11037
881
+ 1263201035_happy_39705
882
+ 1263201035_happy_4830
883
+ 1263201035_neutral_4293
884
+ 1263201035_sad_52958
885
+ 1263201035_surprise_29677
886
+ 1263201035_fear_39159
887
+ 1263201035_sad_72860
888
+ 1263201035_angry_29082
889
+ 1263201035_sad_39742
890
+ 1263201035_sad_30599
891
+ 1263201035_neutral_67199
892
+ 1263201035_sad_40113
893
+ 1263201035_neutral_39703
894
+ 1263201035_surprise_34137
895
+ 1263201035_angry_33762
896
+ 1263201035_neutral_73037
897
+ 1263201035_sad_11495
898
+ 1263201035_happy_19082
899
+ 1263201035_neutral_50490
900
+ 1263201035_surprise_29721
901
+ 1263201035_angry_40175
902
+ 1263201035_sad_38404
903
+ 1263201035_neutral_39049
904
+ 1263201035_neutral_75185
905
+ 1263201035_sad_32699
906
+ 1263201035_neutral_53367
907
+ 1263201035_happy_53621
908
+ 1263201035_fear_38412
909
+ 1263201035_sad_52229
910
+ 1263201035_fear_52862
911
+ 1263201035_happy_63544
912
+ 1263201035_neutral_50509
913
+ 1263201035_fear_10623
914
+ 1263201035_surprise_51898
915
+ 1263201035_angry_33174
916
+ 1263201035_fear_67464
917
+ 1263201035_angry_5344
918
+ 1263201035_happy_73012
919
+ 1263201035_fear_53813
920
+ 1263201035_fear_50814
921
+ 1263201035_happy_74686
922
+ 1263201035_neutral_40154
923
+ 1263201035_fear_11105
924
+ 1263201035_surprise_39897
925
+ 1263201035_surprise_53784
926
+ 1263201035_sad_4579
927
+ 1263201035_fear_50040
928
+ 1263201035_angry_39231
929
+ 1263201035_fear_63686
930
+ 1263201035_angry_42327
931
+ 1263201035_sad_75287
932
+ 1263201035_neutral_73073
933
+ 1263201035_surprise_39412
934
+ 1263201035_sad_8361
935
+ 1263201035_surprise_4093
936
+ 1263201035_neutral_51817
937
+ 1263201035_fear_52576
938
+ 1263201035_fear_52801
939
+ 1263201035_angry_73261
940
+ 1263201035_surprise_51022
941
+ 1263201035_surprise_52450
942
+ 1263201035_happy_61374
943
+ 1263201035_sad_15168
944
+ 1263201035_neutral_5281
945
+ 1263201035_sad_50045
946
+ 1263201035_happy_11084
947
+ 1263201035_neutral_15650
948
+ 1263201035_fear_11348
949
+ 1263201035_happy_30681
950
+ 1263201035_surprise_67291
951
+ 1263201035_sad_75590
952
+ 1263201035_sad_22374
953
+ 1263201035_happy_39178
954
+ 1263201035_angry_50170
955
+ 1263201035_neutral_50965
956
+ 1263201035_surprise_30201
957
+ 1263201035_sad_63504
958
+ 1263201035_neutral_63609
959
+ 1263201035_sad_29253
960
+ 1263201035_sad_48656
961
+ 1263201035_fear_30352
962
+ 1263201035_angry_8474
963
+ 1263201035_neutral_31460
964
+ 1263201035_sad_10715
965
+ 1263201035_fear_50105
966
+ 1263201035_surprise_50213
967
+ 1263201035_angry_40299
968
+ 1263201035_surprise_74656
969
+ 1263201035_happy_32652
970
+ 1263201035_fear_32492
971
+ 1263201035_happy_12024
972
+ 1263201035_neutral_29155
973
+ 1263201035_happy_74939
974
+ 1263201035_fear_38953
975
+ 1263201035_happy_8491
976
+ 1263201035_fear_21711
977
+ 1263201035_happy_5324
978
+ 1263201035_angry_10738
979
+ 1263201035_neutral_15651
980
+ 1263201035_neutral_50824
981
+ 1263201035_sad_53079
982
+ 1263201035_surprise_30571
983
+ 1263201035_fear_51556
984
+ 1263201035_happy_52539
985
+ 1263201035_neutral_8922
986
+ 1263201035_fear_53631
987
+ 1263201035_surprise_29318
988
+ 1263201035_fear_15464
989
+ 1263201035_neutral_8471
990
+ 1263201035_sad_53722
991
+ 1263201035_neutral_52716
992
+ 1263201035_fear_39748
993
+ 1263201035_surprise_32056
994
+ 1263201035_sad_75441
995
+ 1263201035_angry_15533
996
+ 1263201035_sad_10515
997
+ 1263201035_happy_5089
998
+ 1263201035_happy_39815
999
+ 1263201035_surprise_30818
1000
+ 1263201035_sad_31068
1001
+ 1263201035_surprise_52029
1002
+ 1263201035_happy_8596
1003
+ 1263201035_surprise_74604
1004
+ 1263201035_angry_53024
1005
+ 1263201035_angry_4343
1006
+ 1263201035_sad_49994
1007
+ 1263201035_neutral_61343
1008
+ 1263201035_happy_52354
1009
+ 1263201035_fear_63597
1010
+ 1263201035_neutral_31698
1011
+ 1263201035_happy_29320
1012
+ 1263201035_neutral_74910
1013
+ 1263201035_neutral_34222
1014
+ 1263201035_fear_4507
1015
+ 1263201035_fear_48560
1016
+ 1263201035_angry_67619
1017
+ 1263201035_sad_66395
1018
+ 1263201035_fear_39996
1019
+ 1263201035_angry_22530
1020
+ 1263201035_surprise_30548
1021
+ 1263201035_neutral_15605
1022
+ 1263201035_surprise_63371
1023
+ 1263201035_surprise_38823
1024
+ 1263201035_surprise_74805
1025
+ 1263201035_angry_67907
1026
+ 1263201035_happy_75280
1027
+ 1263201035_angry_63548
1028
+ 1263201035_surprise_51948
1029
+ 1263201035_angry_73214
1030
+ 1263201035_happy_30300
1031
+ 1263201035_happy_67102
1032
+ 1263201035_neutral_49728
1033
+ 1263201035_fear_72899
1034
+ 1263201035_happy_48069
1035
+ 1263201035_happy_48519
1036
+ 1263201035_happy_61333
1037
+ 1263201035_fear_66977
1038
+ 1263201035_angry_10814
1039
+ 1263201035_happy_48081
1040
+ 1263201035_sad_75563
1041
+ 1263201035_sad_53355
1042
+ 1263201035_neutral_51098
1043
+ 1263201035_neutral_74775
1044
+ 1263201035_happy_67853
1045
+ 1263201035_happy_31527
1046
+ 1263201035_happy_8465
1047
+ 1263201035_angry_66923
1048
+ 1263201035_sad_50352
1049
+ 1263201035_happy_39793
1050
+ 1263201035_angry_66357
1051
+ 1263201035_angry_48646
1052
+ 1263201035_fear_40016
1053
+ 1263201035_neutral_33407
1054
+ 1263201035_angry_33749
1055
+ 1263201035_surprise_53042
1056
+ 1263201035_happy_72808
1057
+ 1263201035_sad_51494
1058
+ 1263201035_happy_50609
1059
+ 1263201035_angry_49643
1060
+ 1263201035_angry_50679
1061
+ 1263201035_angry_50596
1062
+ 1263201035_neutral_33504
1063
+ 1263201035_neutral_31225
1064
+ 1263201035_happy_4894
1065
+ 1263201035_sad_63614
1066
+ 1263201035_fear_75770
1067
+ 1263201035_surprise_74857
1068
+ 1263201035_fear_63685
1069
+ 1263201035_fear_30893
1070
+ 1263201035_happy_40166
1071
+ 1263201035_fear_49690
1072
+ 1263201035_neutral_67588
1073
+ 1263201035_happy_49611
1074
+ 1263201035_happy_32174
1075
+ 1263201035_fear_29229
1076
+ 1263201035_fear_5235
1077
+ 1263201035_neutral_67976
1078
+ 1263201035_angry_73084
1079
+ 1263201035_neutral_75461
1080
+ 1263201035_neutral_50528
1081
+ 1263201035_angry_50770
1082
+ 1263201035_happy_39706
1083
+ 1263201035_surprise_75990
1084
+ 1263201035_sad_51565
1085
+ 1263201035_angry_52842
1086
+ 1263201035_happy_67049
1087
+ 1263201035_angry_32674
1088
+ 1263201035_neutral_51873
1089
+ 1263201035_angry_29853
1090
+ 1263201035_neutral_4354
1091
+ 1263201035_neutral_22436
1092
+ 1263201035_fear_50720
1093
+ 1263201035_surprise_61329
1094
+ 1263201035_surprise_11261
1095
+ 1263201035_sad_75848
1096
+ 1263201035_sad_10699
1097
+ 1263201035_angry_5123
1098
+ 1263201035_fear_30592
1099
+ 1263201035_fear_52675
1100
+ 1263201035_happy_21738
1101
+ 1263201035_sad_32243
1102
+ 1263201035_sad_50072
1103
+ 1263201035_angry_33455
1104
+ 1263201035_neutral_52257
1105
+ 1263201035_happy_31264
1106
+ 1263201035_neutral_47904
1107
+ 1263201035_surprise_53481
1108
+ 1263201035_angry_52513
1109
+ 1263201035_neutral_49974
1110
+ 1263201035_happy_66913
1111
+ 1263201035_surprise_29856
1112
+ 1263201035_sad_52129
1113
+ 1263201035_happy_21719
1114
+ 1263201035_neutral_11030
1115
+ 1263201035_neutral_50456
1116
+ 1263201035_sad_48643
1117
+ 1263201035_angry_9262
1118
+ 1263201035_neutral_66143
1119
+ 1263201035_surprise_75652
1120
+ 1263201035_sad_30698
1121
+ 1263201035_fear_75176
1122
+ 1263201035_neutral_10977
1123
+ 1263201035_neutral_39268
1124
+ 1263201035_neutral_50285
1125
+ 1263201035_neutral_39544
1126
+ 1263201035_happy_32345
1127
+ 1263201035_happy_30835
1128
+ 1263201035_neutral_33950
1129
+ 1263201035_angry_30278
1130
+ 1263201035_happy_30273
1131
+ 1263201035_happy_21756
1132
+ 1263201035_neutral_73031
1133
+ 1263201035_neutral_8546
1134
+ 1263201035_surprise_73017
1135
+ 1263201035_happy_66400
1136
+ 1263201035_sad_31277
1137
+ 1263201035_angry_22963
1138
+ 1263201035_fear_66255
1139
+ 1263201035_sad_29686
1140
+ 1263201035_angry_52242
1141
+ 1263201035_angry_11133
1142
+ 1263201035_neutral_52828
1143
+ 1263201035_surprise_16054
1144
+ 1263201035_surprise_40191
1145
+ 1263201035_sad_8419
1146
+ 1263201035_sad_39968
1147
+ 1263201035_neutral_8483
1148
+ 1263201035_surprise_4264
1149
+ 1263201035_fear_50230
1150
+ 1263201035_angry_74736
1151
+ 1263201035_surprise_32422
1152
+ 1263201035_sad_32772
1153
+ 1263201035_neutral_52947
1154
+ 1263201035_sad_9273
1155
+ 1263201035_happy_19086
1156
+ 1263201035_happy_31004
1157
+ 1263201035_sad_29151
1158
+ 1263201035_angry_52912
1159
+ 1263201035_sad_75398
1160
+ 1263201035_angry_30187
1161
+ 1263201035_surprise_22589
1162
+ 1263201035_happy_30196
1163
+ 1263201035_neutral_50272
1164
+ 1263201035_surprise_67874
1165
+ 1263201035_neutral_53547
1166
+ 1263201035_sad_32095
1167
+ 1263201035_fear_8954
1168
+ 1263201035_surprise_38463
1169
+ 1263201035_angry_4598
1170
+ 1263201035_surprise_49615
1171
+ 1263201035_sad_11282
1172
+ 1263201035_fear_52581
1173
+ 1263201035_surprise_31407
1174
+ 1263201035_happy_31046
1175
+ 1263201035_fear_33910
1176
+ 1263201035_happy_63495
1177
+ 1263201035_happy_30039
1178
+ 1263201035_fear_40273
1179
+ 1263201035_neutral_48080
1180
+ 1263201035_surprise_32547
1181
+ 1263201035_happy_50765
1182
+ 1263201035_angry_50486
1183
+ 1263201035_surprise_49764
1184
+ 1263201035_happy_22920
1185
+ 1263201035_neutral_32123
1186
+ 1263201035_fear_39886
1187
+ 1263201035_angry_10372
1188
+ 1263201035_neutral_32294
1189
+ 1263201035_happy_34162
1190
+ 1263201035_surprise_51598
1191
+ 1263201035_happy_49940
1192
+ 1263201035_surprise_51929
1193
+ 1263201035_happy_11369
1194
+ 1263201035_fear_39832
1195
+ 1263201035_angry_75741
1196
+ 1263201035_sad_33423
1197
+ 1263201035_angry_53479
1198
+ 1263201035_fear_30130
1199
+ 1263201035_neutral_53571
1200
+ 1263201035_angry_49899
1201
+ 1263201035_neutral_53474
1202
+ 1263201035_surprise_75290
1203
+ 1263201035_neutral_34254
1204
+ 1263201035_neutral_51142
1205
+ 1263201035_happy_17679
1206
+ 1263201035_angry_39812
1207
+ 1263201035_neutral_63663
1208
+ 1263201035_fear_38451
1209
+ 1263201035_happy_49941
1210
+ 1263201035_surprise_32966
1211
+ 1263201035_neutral_21763
1212
+ 1263201035_surprise_19121
1213
+ 1263201035_happy_31628
1214
+ 1263201035_sad_29759
1215
+ 1263201035_neutral_42335
1216
+ 1263201035_happy_50507
1217
+ 1263201035_surprise_75387
1218
+ 1263201035_sad_39378
1219
+ 1263201035_neutral_47967
1220
+ 1263201035_fear_67521
1221
+ 1263201035_sad_74885
1222
+ 1263201035_surprise_40289
1223
+ 1263201035_neutral_51735
1224
+ 1263201035_neutral_52392
1225
+ 1263201035_happy_67098
1226
+ 1263201035_sad_11275
1227
+ 1263201035_surprise_52919
1228
+ 1263201035_surprise_52621
1229
+ 1263201035_angry_11315
1230
+ 1263201035_happy_38747
1231
+ 1263201035_surprise_53653
1232
+ 1263201035_happy_12056
1233
+ 1263201035_neutral_11140
1234
+ 1263201035_happy_39451
1235
+ 1263201035_angry_15631
1236
+ 1263201035_happy_29367
1237
+ 1263201035_neutral_39607
1238
+ 1263201035_surprise_63449
1239
+ 1263201035_happy_16064
1240
+ 1263201035_neutral_52837
1241
+ 1263201035_surprise_40139
1242
+ 1263201035_surprise_52982
1243
+ 1263201035_sad_75831
1244
+ 1263201035_surprise_4089
1245
+ 1263201035_happy_33475
1246
+ 1263201035_neutral_63551
1247
+ 1263201035_angry_51373
1248
+ 1263201035_surprise_32218
1249
+ 1263201035_surprise_47731
1250
+ 1263201035_angry_30846
1251
+ 1263201035_happy_4517
1252
+ 1263201035_sad_66406
1253
+ 1263201035_neutral_66245
1254
+ 1263201035_neutral_29757
1255
+ 1263201035_fear_51975
1256
+ 1263201035_neutral_33863
1257
+ 1263201035_happy_32858
1258
+ 1263201035_surprise_49925
1259
+ 1263201035_fear_52563
1260
+ 1263201035_neutral_39479
1261
+ 1263201035_surprise_75967
1262
+ 1263201035_neutral_73029
1263
+ 1263201035_neutral_66247
1264
+ 1263201035_surprise_49870
1265
+ 1263201035_fear_29743
1266
+ 1263201035_sad_32855
1267
+ 1263201035_neutral_73239
1268
+ 1263201035_neutral_42290
1269
+ 1263201035_happy_49630
1270
+ 1263201035_neutral_10406
1271
+ 1263201035_fear_11309
1272
+ 1263201035_sad_75808
1273
+ 1263201035_surprise_50838
1274
+ 1263201035_surprise_5135
1275
+ 1263201035_surprise_38621
1276
+ 1263201035_surprise_53225
1277
+ 1263201035_neutral_31166
1278
+ 1263201035_angry_66328
1279
+ 1263201035_surprise_75594
1280
+ 1263201035_happy_38487
1281
+ 1263201035_surprise_66347
1282
+ 1263201035_sad_49821
1283
+ 1263201035_sad_38608
1284
+ 1263201035_angry_50639
1285
+ 1263201035_angry_72945
1286
+ 1263201035_fear_53431
1287
+ 1263201035_neutral_34239
1288
+ 1263201035_neutral_14770
1289
+ 1263201035_fear_31148
1290
+ 1263201035_fear_66316
1291
+ 1263201035_surprise_39983
1292
+ 1263201035_neutral_73287
1293
+ 1263201035_surprise_5045
1294
+ 1263201035_angry_47908
1295
+ 1263201035_fear_15109
1296
+ 1263201035_neutral_40336
1297
+ 1263201035_neutral_74641
1298
+ 1263201035_surprise_4626
1299
+ 1263201035_happy_38716
1300
+ 1263201035_angry_52295
1301
+ 1263201035_happy_4652
1302
+ 1263201035_angry_34248
1303
+ 1263201035_happy_49627
1304
+ 1263201035_neutral_74738
1305
+ 1263201035_neutral_75641
1306
+ 1263201035_fear_8534
1307
+ 1263201035_surprise_32292
1308
+ 1263201035_surprise_8925
1309
+ 1263201035_sad_30446
1310
+ 1263201035_angry_8683
1311
+ 1263201035_happy_65134
1312
+ 1263201035_sad_75694
1313
+ 1263201035_sad_9612
1314
+ 1263201035_neutral_52400
1315
+ 1263201035_fear_53500
1316
+ 1263201035_surprise_32432
1317
+ 1263201035_angry_33598
1318
+ 1263201035_neutral_8594
1319
+ 1263201035_happy_33634
1320
+ 1263201035_sad_21752
1321
+ 1263201035_fear_39499
1322
+ 1263201035_happy_67085
1323
+ 1263201035_angry_37435
1324
+ 1263201035_neutral_30556
1325
+ 1263201035_neutral_75609
1326
+ 1263201035_surprise_31361
1327
+ 1263201035_surprise_53792
1328
+ 1263201035_sad_53994
1329
+ 1263201035_neutral_75762
1330
+ 1263201035_surprise_47742
1331
+ 1263201035_sad_42334
1332
+ 1263201035_sad_15165
1333
+ 1263201035_fear_53975
1334
+ 1263201035_angry_33597
1335
+ 1263201035_happy_40059
1336
+ 1263201035_angry_39229
1337
+ 1263201035_sad_22405
1338
+ 1263201035_sad_52287
1339
+ 1263201035_surprise_53088
1340
+ 1263201035_sad_33272
1341
+ 1263201035_happy_15401
1342
+ 1263201035_surprise_67149
1343
+ 1263201035_angry_12030
1344
+ 1263201035_fear_4892
1345
+ 1263201035_happy_67525
1346
+ 1263201035_happy_67269
1347
+ 1263201035_sad_30193
1348
+ 1263201035_sad_15695
1349
+ 1263201035_surprise_32726
1350
+ 1263201035_neutral_66239
1351
+ 1263201035_happy_30784
1352
+ 1263201035_angry_30596
1353
+ 1263201035_happy_31525
1354
+ 1263201035_sad_33907
1355
+ 1263201035_angry_5185
1356
+ 1263201035_neutral_47785
1357
+ 1263201035_happy_39177
1358
+ 1263201035_sad_47891
1359
+ 1263201035_neutral_22577
1360
+ 1263201035_neutral_75458
1361
+ 1263201035_happy_40295
1362
+ 1263201035_angry_49946
1363
+ 1263201035_happy_38689
1364
+ 1263201035_neutral_11015
1365
+ 1263201035_neutral_49848
1366
+ 1263201035_neutral_74879
1367
+ 1263201035_sad_75525
1368
+ 1263201035_fear_30439
1369
+ 1263201035_angry_50534
1370
+ 1263201035_neutral_22907
1371
+ 1263201035_neutral_32594
1372
+ 1263201035_sad_75409
1373
+ 1263201035_surprise_33379
1374
+ 1263201035_surprise_30802
1375
+ 1263201035_neutral_30832
1376
+ 1263201035_sad_10787
1377
+ 1263201035_sad_19118
1378
+ 1263201035_fear_32502
1379
+ 1263201035_neutral_73193
1380
+ 1263201035_angry_48064
1381
+ 1263201035_fear_39971
1382
+ 1263201035_neutral_38494
1383
+ 1263201035_sad_16065
1384
+ 1263201035_surprise_50463
1385
+ 1263201035_happy_40235
1386
+ 1263201035_neutral_74830
1387
+ 1263201035_happy_50497
1388
+ 1263201035_fear_40199
1389
+ 1263201035_fear_15437
1390
+ 1263201035_sad_51866
1391
+ 1263201035_fear_51907
1392
+ 1263201035_fear_52002
1393
+ 1263201035_surprise_53699
1394
+ 1263201035_fear_8298
1395
+ 1263201035_fear_30662
1396
+ 1263201035_fear_66463
1397
+ 1263201035_angry_51570
1398
+ 1263201035_surprise_31363
1399
+ 1263201035_sad_63546
1400
+ 1263201035_angry_8535
1401
+ 1263201035_sad_66941
1402
+ 1263201035_angry_40065
1403
+ 1263201035_sad_30138
1404
+ 1263201035_sad_40363
1405
+ 1263201035_angry_29132
1406
+ 1263201035_sad_4233
1407
+ 1263201035_neutral_11263
1408
+ 1263201035_sad_8634
1409
+ 1263201035_surprise_30036
1410
+ 1263201035_happy_30849
1411
+ 1263201035_angry_10458
1412
+ 1263201035_surprise_50205
1413
+ 1263201035_happy_5925
1414
+ 1263201035_neutral_75434
1415
+ 1263201035_neutral_30555
1416
+ 1263201035_angry_29949
1417
+ 1263201035_fear_74642
1418
+ 1263201035_fear_67495
1419
+ 1263201035_sad_39233
1420
+ 1263201035_sad_15136
1421
+ 1263201035_sad_51463
1422
+ 1263201035_angry_29747
1423
+ 1263201035_angry_67176
1424
+ 1263201035_neutral_33905
1425
+ 1263201035_happy_29842
1426
+ 1263201035_angry_66451
1427
+ 1263201035_happy_33323
1428
+ 1263201035_fear_50493
1429
+ 1263201035_sad_51559
1430
+ 1263201035_surprise_53746
1431
+ 1263201035_angry_32509
1432
+ 1263201035_angry_72799
1433
+ 1263201035_sad_52967
1434
+ 1263201035_surprise_32354
1435
+ 1263201035_angry_22984
1436
+ 1263201035_angry_75319
1437
+ 1263201035_sad_31636
1438
+ 1263201035_fear_47837
1439
+ 1263201035_angry_21669
1440
+ 1263201035_neutral_21725
1441
+ 1263201035_sad_50777
1442
+ 1263201035_surprise_17722
1443
+ 1263201035_surprise_9679
1444
+ 1263201035_angry_75278
1445
+ 1263201035_fear_29045
1446
+ 1263201035_neutral_38636
1447
+ 1263201035_happy_30936
1448
+ 1263201035_sad_8975
1449
+ 1263201035_neutral_72984
1450
+ 1263201035_surprise_29121
1451
+ 1263201035_sad_31007
1452
+ 1263201035_surprise_50325
1453
+ 1263201035_neutral_8276
1454
+ 1263201035_happy_53596
1455
+ 1263201035_angry_29719
1456
+ 1263201035_sad_10950
1457
+ 1263201035_neutral_10925
1458
+ 1263201035_fear_50809
1459
+ 1263201035_happy_67625
1460
+ 1263201035_fear_53014
1461
+ 1263201035_neutral_10427
1462
+ 1263201035_neutral_31146
1463
+ 1263201035_neutral_53604
1464
+ 1263201035_happy_51750
1465
+ 1263201035_angry_4136
1466
+ 1263201035_sad_50211
1467
+ 1263201035_fear_49998
1468
+ 1263201035_fear_74646
1469
+ 1263201035_happy_50637
1470
+ 1263201035_neutral_66955
1471
+ 1263201035_neutral_67816
1472
+ 1263201035_surprise_50148
1473
+ 1263201035_fear_15425
1474
+ 1263201035_fear_51563
1475
+ 1263201035_surprise_48055
1476
+ 1263201035_fear_11496
1477
+ 1263201035_fear_4931
1478
+ 1263201035_surprise_30442
1479
+ 1263201035_happy_39465
1480
+ 1263201035_fear_52327
1481
+ 1263201035_happy_29118
1482
+ 1263201035_surprise_30052
1483
+ 1263201035_surprise_31501
1484
+ 1263201035_sad_30962
1485
+ 1263201035_sad_50753
1486
+ 1263201035_surprise_53247
1487
+ 1263201035_happy_67228
1488
+ 1263201035_surprise_39670
1489
+ 1263201035_neutral_33743
1490
+ 1263201035_neutral_30148
1491
+ 1263201035_sad_52140
1492
+ 1263201035_sad_15545
1493
+ 1263201035_surprise_52726
1494
+ 1263201035_neutral_34187
1495
+ 1263201035_happy_50068
1496
+ 1263201035_happy_7787
1497
+ 1263201035_happy_22360
1498
+ 1263201035_neutral_51444
1499
+ 1263201035_sad_47856
1500
+ 1263201035_happy_67717
1501
+ 1263201035_neutral_31494
1502
+ 1263201035_angry_52974
1503
+ 1263201035_sad_40028
1504
+ 1263201035_happy_32949
1505
+ 1263201035_sad_73025
1506
+ 1263201035_sad_75547
1507
+ 1263201035_neutral_37402
1508
+ 1263201035_angry_39221
1509
+ 1263201035_happy_22353
1510
+ 1263201035_fear_33457
1511
+ 1263201035_surprise_39582
1512
+ 1263201035_happy_39796
1513
+ 1263201035_angry_31679
1514
+ 1263201035_surprise_38690
1515
+ 1263201035_happy_38486
1516
+ 1263201035_surprise_48682
1517
+ 1263201035_surprise_53326
1518
+ 1263201035_happy_52683
1519
+ 1263201035_happy_15155
1520
+ 1263201035_fear_38991
1521
+ 1263201035_sad_11136
1522
+ 1263201035_fear_4899
1523
+ 1263201035_angry_29131
1524
+ 1263201035_surprise_32403
1525
+ 1263201035_surprise_39789
1526
+ 1263201035_sad_33493
1527
+ 1263201035_happy_31444
1528
+ 1263201035_fear_15099
1529
+ 1263201035_surprise_63711
1530
+ 1263201035_fear_29954
1531
+ 1263201035_angry_51225
1532
+ 1263201035_happy_32396
1533
+ 1263201035_happy_63255
1534
+ 1263201035_happy_52343
1535
+ 1263201035_fear_50385
1536
+ 1263201035_sad_15188
1537
+ 1263201035_happy_11314
1538
+ 1263201035_surprise_30250
1539
+ 1263201035_angry_5068
1540
+ 1263201035_fear_16047
1541
+ 1263201035_angry_53992
1542
+ 1263201035_angry_51230
1543
+ 1263201035_neutral_47844
1544
+ 1263201035_neutral_51857
1545
+ 1263201035_fear_11301
1546
+ 1263201035_angry_73047
1547
+ 1263201035_surprise_52813
1548
+ 1263201035_angry_38888
1549
+ 1263201035_fear_67850
1550
+ 1263201035_surprise_52686
1551
+ 1263201035_happy_73172
1552
+ 1263201035_surprise_48507
1553
+ 1263201035_angry_11009
1554
+ 1263201035_angry_72928
1555
+ 1263201035_neutral_22957
1556
+ 1263201035_happy_9596
1557
+ 1263201035_sad_38634
1558
+ 1263201035_neutral_4752
1559
+ 1263201035_surprise_17705
1560
+ 1263201035_fear_32444
1561
+ 1263201035_sad_63543
1562
+ 1263201035_angry_11435
1563
+ 1263201035_neutral_39724
1564
+ 1263201035_sad_31255
1565
+ 1263201035_sad_67185
1566
+ 1263201035_happy_48649
1567
+ 1263201035_happy_67035
1568
+ 1263201035_neutral_10972
1569
+ 1263201035_angry_50243
1570
+ 1263201035_angry_39596
1571
+ 1263201035_surprise_39782
1572
+ 1263201035_neutral_10912
1573
+ 1263201035_neutral_63286
1574
+ 1263201035_sad_32040
1575
+ 1263201035_angry_75388
1576
+ 1263201035_surprise_53393
1577
+ 1263201035_fear_32919
1578
+ 1263201035_happy_10937
1579
+ 1263201035_neutral_5022
1580
+ 1263201035_neutral_50339
1581
+ 1263201035_fear_38619
1582
+ 1263201035_surprise_4880
1583
+ 1263201035_neutral_51756
1584
+ 1263201035_surprise_22917
1585
+ 1263201035_angry_22444
1586
+ 1263201035_neutral_40183
1587
+ 1263201035_sad_53448
1588
+ 1263201035_sad_34016
1589
+ 1263201035_angry_50025
1590
+ 1263201035_happy_31648
1591
+ 1263201035_sad_63638
1592
+ 1263201035_neutral_15674
1593
+ 1263201035_happy_34278
1594
+ 1263201035_surprise_38615
1595
+ 1263201035_happy_67156
1596
+ 1263201035_angry_32512
1597
+ 1263201035_neutral_67005
1598
+ 1263201035_fear_53534
1599
+ 1263201035_fear_7796
1600
+ 1263201035_neutral_37423
1601
+ 1263201035_angry_51300
1602
+ 1263201035_neutral_33853
1603
+ 1263201035_sad_67241
1604
+ 1263201035_happy_33310
1605
+ 1263201035_angry_33421
1606
+ 1263201035_surprise_52863
1607
+ 1263201035_sad_11325
1608
+ 1263201035_fear_10391
1609
+ 1263201035_angry_63162
1610
+ 1263201035_happy_29976
1611
+ 1263201035_angry_37386
1612
+ 1263201035_fear_74728
1613
+ 1263201035_neutral_30552
1614
+ 1263201035_angry_67461
1615
+ 1263201035_happy_53967
1616
+ 1263201035_fear_38672
1617
+ 1263201035_happy_39237
1618
+ 1263201035_surprise_51769
1619
+ 1263201035_neutral_50987
1620
+ 1263201035_angry_30494
1621
+ 1263201035_sad_53352
1622
+ 1263201035_happy_67582
1623
+ 1263201035_happy_53572
1624
+ 1263201035_neutral_32151
1625
+ 1263201035_angry_29274
1626
+ 1263201035_fear_52319
1627
+ 1263201035_sad_51481
1628
+ 1263201035_surprise_15779
1629
+ 1263201035_surprise_32105
1630
+ 1263201035_surprise_32967
1631
+ 1263201035_neutral_30949
1632
+ 1263201035_angry_34062
1633
+ 1263201035_angry_50057
1634
+ 1263201035_happy_4502
1635
+ 1263201035_angry_11318
1636
+ 1263201035_angry_73146
1637
+ 1263201035_neutral_73275
1638
+ 1263201035_fear_47776
1639
+ 1263201035_neutral_39625
1640
+ 1263201035_surprise_66337
1641
+ 1263201035_fear_48617
1642
+ 1263201035_fear_65128
1643
+ 1263201035_neutral_33851
1644
+ 1263201035_fear_29675
1645
+ 1263201035_fear_66334
1646
+ 1263201035_fear_37398
1647
+ 1263201035_surprise_15198
1648
+ 1263201035_happy_42333
1649
+ 1263201035_neutral_4978
1650
+ 1263201035_happy_21734
1651
+ 1263201035_sad_53614
1652
+ 1263201035_happy_52905
1653
+ 1263201035_fear_73245
1654
+ 1263201035_sad_52611
1655
+ 1263201035_angry_74731
1656
+ 1263201035_surprise_10694
1657
+ 1263201035_surprise_10797
1658
+ 1263201035_fear_75325
1659
+ 1263201035_fear_31351
1660
+ 1263201035_sad_53289
1661
+ 1263201035_happy_29072
1662
+ 1263201035_happy_53603
1663
+ 1263201035_neutral_39605
1664
+ 1263201035_happy_52739
1665
+ 1263201035_surprise_29680
1666
+ 1263201035_neutral_74608
1667
+ 1263201035_fear_38423
1668
+ 1263201035_fear_8557
1669
+ 1263201035_angry_73179
1670
+ 1263201035_surprise_33461
1671
+ 1263201035_happy_4786
1672
+ 1263201035_happy_10550
1673
+ 1263201035_fear_10728
1674
+ 1263201035_sad_50116
1675
+ 1263201035_neutral_51442
1676
+ 1263201035_angry_34263
1677
+ 1263201035_sad_66207
1678
+ 1263201035_happy_52471
1679
+ 1263201035_neutral_73190
1680
+ 1263201035_happy_10931
1681
+ 1263201035_happy_50881
1682
+ 1263201035_sad_74819
1683
+ 1263201035_surprise_53552
1684
+ 1263201035_surprise_5152
1685
+ 1263201035_neutral_29288
1686
+ 1263201035_sad_32220
1687
+ 1263201035_angry_30348
1688
+ 1263201035_happy_31684
1689
+ 1263201035_neutral_33734
1690
+ 1263201035_neutral_30577
1691
+ 1263201035_angry_4763
1692
+ 1263201035_angry_66994
1693
+ 1263201035_happy_22536
1694
+ 1263201035_surprise_67624
1695
+ 1263201035_neutral_52700
1696
+ 1263201035_surprise_38798
1697
+ 1263201035_sad_75953
1698
+ 1263201035_surprise_53210
1699
+ 1263201035_sad_17997
1700
+ 1263201035_surprise_51026
1701
+ 1263201035_sad_39392
1702
+ 1263201035_surprise_32463
1703
+ 1263201035_sad_10665
1704
+ 1263201035_sad_67411
1705
+ 1263201035_surprise_50911
1706
+ 1263201035_neutral_10705
1707
+ 1263201035_sad_33826
1708
+ 1263201035_neutral_49980
1709
+ 1263201035_angry_50513
1710
+ 1263201035_angry_17681
1711
+ 1263201035_angry_50095
1712
+ 1263201035_neutral_53825
1713
+ 1263201035_surprise_15521
1714
+ 1263201035_fear_73288
1715
+ 1263201035_fear_38600
1716
+ 1263201035_neutral_67558
1717
+ 1263201035_happy_19085
1718
+ 1263201035_angry_32184
1719
+ 1263201035_fear_53329
1720
+ 1263201035_happy_47847
1721
+ 1263201035_happy_52108
1722
+ 1263201035_happy_30451
1723
+ 1263201035_fear_51744
1724
+ 1263201035_sad_32414
1725
+ 1263201035_sad_63383
1726
+ 1263201035_happy_32060
1727
+ 1263201035_sad_29790
1728
+ 1263201035_happy_4874
1729
+ 1263201035_fear_33449
1730
+ 1263201035_surprise_31536
1731
+ 1263201035_angry_66230
1732
+ 1263201035_sad_32373
1733
+ 1263201035_angry_67738
1734
+ 1263201035_angry_31125
1735
+ 1263201035_happy_11422
1736
+ 1263201035_neutral_67134
1737
+ 1263201035_angry_12120
1738
+ 1263201035_angry_66973
1739
+ 1263201035_surprise_32389
1740
+ 1263201035_sad_51372
1741
+ 1263201035_sad_16033
1742
+ 1263201035_sad_50083
1743
+ 1263201035_happy_38512
1744
+ 1263201035_sad_51714
1745
+ 1263201035_happy_4105
1746
+ 1263201035_angry_47821
1747
+ 1263201035_surprise_40133
1748
+ 1263201035_angry_5903
1749
+ 1263201035_surprise_29932
1750
+ 1263201035_fear_50647
1751
+ 1263201035_happy_67026
1752
+ 1263201035_happy_52128
1753
+ 1263201035_surprise_30234
1754
+ 1263201035_neutral_50066
1755
+ 1263201035_happy_66173
1756
+ 1263201035_neutral_42287
1757
+ 1263201035_neutral_75367
1758
+ 1263201035_angry_39737
1759
+ 1263201035_sad_53093
1760
+ 1263201035_fear_38945
1761
+ 1263201035_sad_50118
1762
+ 1263201035_sad_31060
1763
+ 1263201035_happy_4610
1764
+ 1263201035_surprise_50075
1765
+ 1263201035_angry_38429
1766
+ 1263201035_surprise_31065
1767
+ 1263201035_fear_66940
1768
+ 1263201035_sad_31477
1769
+ 1263201035_happy_47758
1770
+ 1263201035_happy_11347
1771
+ 1263201035_surprise_32602
1772
+ 1263201035_angry_10766
1773
+ 1263201035_neutral_10657
1774
+ 1263201035_neutral_30911
1775
+ 1263201035_happy_32306
1776
+ 1263201035_surprise_30237
1777
+ 1263201035_angry_53915
1778
+ 1263201035_happy_4700
1779
+ 1263201035_happy_67227
1780
+ 1263201035_neutral_67644
1781
+ 1263201035_happy_29733
1782
+ 1263201035_neutral_31161
1783
+ 1263201035_fear_67963
1784
+ 1263201035_neutral_53259
1785
+ 1263201035_fear_10419
1786
+ 1263201035_sad_75141
1787
+ 1263201035_surprise_38784
1788
+ 1263201035_fear_63231
1789
+ 1263201035_surprise_32747
1790
+ 1263201035_happy_29760
1791
+ 1263201035_fear_8599
1792
+ 1263201035_fear_38858
1793
+ 1263201035_happy_47792
1794
+ 1263201035_angry_53676
1795
+ 1263201035_angry_39283
1796
+ 1263201035_sad_30117
1797
+ 1263201035_fear_53640
1798
+ 1263201035_neutral_67057
1799
+ 1263201035_happy_4171
1800
+ 1263201035_surprise_31318
1801
+ 1263201035_surprise_63260
1802
+ 1263201035_happy_50504
1803
+ 1263201035_happy_31343
1804
+ 1263201035_neutral_11072
1805
+ 1263201035_neutral_53831
1806
+ 1263201035_neutral_67047
1807
+ 1263201035_neutral_66435
1808
+ 1263201035_surprise_30079
1809
+ 1263201035_fear_38508
1810
+ 1263201035_neutral_8310
1811
+ 1263201035_sad_4119
1812
+ 1263201035_happy_50368
1813
+ 1263201035_neutral_10445
1814
+ 1263201035_sad_5144
1815
+ 1263201035_neutral_53875
1816
+ 1263201035_sad_10502
1817
+ 1263201035_angry_51409
1818
+ 1263201035_surprise_32553
1819
+ 1263201035_angry_39628
1820
+ 1263201035_fear_53141
1821
+ 1263201035_sad_51147
1822
+ 1263201035_neutral_51540
1823
+ 1263201035_happy_39095
1824
+ 1263201035_fear_16115
1825
+ 1263201035_happy_38927
1826
+ 1263201035_happy_48674
1827
+ 1263201035_fear_48581
1828
+ 1263201035_neutral_21713
1829
+ 1263201035_happy_53694
1830
+ 1263201035_fear_29119
1831
+ 1263201035_neutral_52886
1832
+ 1263201035_sad_51888
1833
+ 1263201035_surprise_29753
1834
+ 1263201035_sad_73362
1835
+ 1263201035_sad_30916
1836
+ 1263201035_surprise_66353
1837
+ 1263201035_fear_33558
1838
+ 1263201035_sad_8966
1839
+ 1263201035_fear_72931
1840
+ 1263201035_angry_15143
1841
+ 1263201035_fear_53844
1842
+ 1263201035_sad_4336
1843
+ 1263201035_fear_4338
1844
+ 1263201035_angry_22571
1845
+ 1263201035_surprise_39277
1846
+ 1263201035_neutral_74767
1847
+ 1263201035_neutral_4782
1848
+ 1263201035_neutral_4819
1849
+ 1263201035_neutral_38808
1850
+ 1263201035_happy_75490
1851
+ 1263201035_happy_30624
1852
+ 1263201035_angry_40055
1853
+ 1263201035_surprise_33787
1854
+ 1263201035_surprise_8553
1855
+ 1263201035_sad_30808
1856
+ 1263201035_sad_63063
1857
+ 1263201035_angry_15781
1858
+ 1263201035_neutral_4871
1859
+ 1263201035_angry_29359
1860
+ 1263201035_sad_50518
1861
+ 1263201035_sad_38652
1862
+ 1263201035_neutral_31313
1863
+ 1263201035_surprise_49662
1864
+ 1263201035_happy_32965
1865
+ 1263201035_angry_54009
1866
+ 1263201035_neutral_33694
1867
+ 1263201035_neutral_32687
1868
+ 1263201035_fear_75802
1869
+ 1263201035_sad_34155
1870
+ 1263201035_angry_66231
1871
+ 1263201035_neutral_63197
1872
+ 1263201035_happy_51291
1873
+ 1263201035_angry_30901
1874
+ 1263201035_surprise_22594
1875
+ 1263201035_happy_50367
1876
+ 1263201035_neutral_4551
1877
+ 1263201035_happy_30444
1878
+ 1263201035_sad_53731
1879
+ 1263201035_fear_33893
1880
+ 1263201035_fear_5074
1881
+ 1263201035_neutral_66593
1882
+ 1263201035_sad_67216
1883
+ 1263201035_fear_29349
1884
+ 1263201035_sad_4948
1885
+ 1263201035_happy_67633
1886
+ 1263201035_surprise_50361
1887
+ 1263201035_happy_29674
1888
+ 1263201035_neutral_52080
1889
+ 1263201035_angry_48666
1890
+ 1263201035_angry_33712
1891
+ 1263201035_angry_75389
1892
+ 1263201035_angry_50734
1893
+ 1263201035_happy_52473
1894
+ 1263201035_angry_50277
1895
+ 1263201035_neutral_32094
1896
+ 1263201035_fear_5165
1897
+ 1263201035_fear_50217
1898
+ 1263201035_angry_75595
1899
+ 1263201035_angry_34257
1900
+ 1263201035_sad_50938
1901
+ 1263201035_fear_49748
1902
+ 1263201035_happy_49685
1903
+ 1263201035_neutral_50428
1904
+ 1263201035_sad_15176
1905
+ 1263201035_sad_9284
1906
+ 1263201035_neutral_47760
1907
+ 1263201035_neutral_29755
1908
+ 1263201035_neutral_22570
1909
+ 1263201035_neutral_53036
1910
+ 1263201035_fear_22938
1911
+ 1263201035_angry_75935
1912
+ 1263201035_angry_52254
1913
+ 1263201035_sad_38982
1914
+ 1263201035_sad_21687
1915
+ 1263201035_happy_52708
1916
+ 1263201035_sad_30111
1917
+ 1263201035_surprise_66431
1918
+ 1263201035_fear_11494
1919
+ 1263201035_neutral_32582
1920
+ 1263201035_surprise_67486
1921
+ 1263201035_happy_10524
1922
+ 1263201035_sad_8362
1923
+ 1263201035_neutral_39973
1924
+ 1263201035_surprise_66250
1925
+ 1263201035_angry_52839
1926
+ 1263201035_angry_53851
1927
+ 1263201035_surprise_67982
1928
+ 1263201035_neutral_32647
1929
+ 1263201035_neutral_10991
1930
+ 1263201035_sad_52313
1931
+ 1263201035_neutral_39445
1932
+ 1263201035_neutral_52951
1933
+ 1263201035_neutral_51007
1934
+ 1263201035_surprise_51367
1935
+ 1263201035_surprise_39040
1936
+ 1263201035_angry_48564
1937
+ 1263201035_angry_73305
1938
+ 1263201035_happy_66156
1939
+ 1263201035_happy_63541
1940
+ 1263201035_fear_74795
1941
+ 1263201035_fear_30854
1942
+ 1263201035_fear_48694
1943
+ 1263201035_happy_9660
1944
+ 1263201035_surprise_51740
1945
+ 1263201035_neutral_49658
1946
+ 1263201035_neutral_17693
1947
+ 1263201035_sad_47969
1948
+ 1263201035_sad_30008
1949
+ 1263201035_angry_31345
1950
+ 1263201035_happy_15601
1951
+ 1263201035_sad_66301
1952
+ 1263201035_surprise_53157
1953
+ 1263201035_neutral_30953
1954
+ 1263201035_fear_50634
1955
+ 1263201035_neutral_8449
1956
+ 1263201035_neutral_15191
1957
+ 1263201035_angry_32771
1958
+ 1263201035_surprise_67012
1959
+ 1263201035_sad_52543
1960
+ 1263201035_neutral_39082
1961
+ 1263201035_happy_50078
1962
+ 1263201035_sad_51103
1963
+ 1263201035_happy_40316
1964
+ 1263201035_happy_32291
1965
+ 1263201035_neutral_33192
1966
+ 1263201035_sad_76012
1967
+ 1263201035_sad_52759
1968
+ 1263201035_sad_53548
1969
+ 1263201035_fear_39715
1970
+ 1263201035_sad_72851
1971
+ 1263201035_angry_53743
1972
+ 1263201035_happy_39837
1973
+ 1263201035_neutral_63348
1974
+ 1263201035_sad_74668
1975
+ 1263201035_surprise_75251
1976
+ 1263201035_fear_15376
1977
+ 1263201035_neutral_4234
1978
+ 1263201035_neutral_42270
1979
+ 1263201035_happy_52805
1980
+ 1263201035_happy_52704
1981
+ 1263201035_sad_75964
1982
+ 805570882_angry_64699
1983
+ 805570882_surprise_55222
1984
+ 805570882_sad_31916
1985
+ 805570882_angry_36288
1986
+ 805570882_sad_62932
1987
+ 805570882_happy_27027
1988
+ 805570882_surprise_56882
1989
+ 805570882_angry_43473
1990
+ 805570882_fear_41114
1991
+ 805570882_neutral_40497
1992
+ 805570882_neutral_29390
1993
+ 805570882_happy_36822
1994
+ 805570882_neutral_31819
1995
+ 805570882_fear_65887
1996
+ 805570882_fear_23962
1997
+ 805570882_neutral_33025
1998
+ 805570882_surprise_37205
1999
+ 805570882_angry_29593
2000
+ 805570882_sad_70731
2001
+ 805570882_fear_20336
2002
+ 805570882_neutral_18525
2003
+ 805570882_happy_42021
2004
+ 805570882_sad_41786
2005
+ 805570882_happy_24961
2006
+ 805570882_neutral_36905
2007
+ 805570882_surprise_17791
2008
+ 805570882_surprise_27648
2009
+ 805570882_neutral_55697
2010
+ 805570882_neutral_65787
2011
+ 805570882_sad_63958
2012
+ 805570882_surprise_20204
2013
+ 805570882_sad_46632
2014
+ 805570882_happy_47411
2015
+ 805570882_angry_41859
2016
+ 805570882_surprise_27006
2017
+ 805570882_angry_42264
2018
+ 805570882_happy_20262
2019
+ 805570882_fear_44561
2020
+ 805570882_fear_46593
2021
+ 805570882_surprise_15909
2022
+ 805570882_neutral_61258
2023
+ 805570882_angry_43263
2024
+ 805570882_surprise_16708
2025
+ 805570882_angry_14931
2026
+ 805570882_angry_55734
2027
+ 805570882_happy_24919
2028
+ 805570882_fear_24236
2029
+ 805570882_sad_24361
2030
+ 805570882_neutral_19960
2031
+ 805570882_angry_65833
2032
+ 805570882_fear_49107
2033
+ 805570882_sad_27839
2034
+ 805570882_angry_41828
2035
+ 805570882_angry_27139
2036
+ 805570882_fear_63159
2037
+ 805570882_surprise_57740
2038
+ 805570882_surprise_18601
2039
+ 805570882_fear_61196
2040
+ 805570882_angry_40655
2041
+ 805570882_fear_19942
2042
+ 805570882_sad_63223
2043
+ 805570882_angry_63849
2044
+ 805570882_fear_75004
2045
+ 805570882_surprise_73987
2046
+ 805570882_surprise_41853
2047
+ 805570882_sad_46180
2048
+ 805570882_happy_18233
2049
+ 805570882_happy_26890
2050
+ 805570882_angry_64789
2051
+ 805570882_surprise_57794
2052
+ 805570882_sad_27306
2053
+ 805570882_fear_27730
2054
+ 805570882_angry_15582
2055
+ 805570882_happy_46355
2056
+ 805570882_surprise_17772
2057
+ 805570882_surprise_41567
2058
+ 805570882_fear_14604
2059
+ 805570882_sad_37932
2060
+ 805570882_surprise_70881
2061
+ 805570882_neutral_70911
2062
+ 805570882_neutral_22809
2063
+ 805570882_surprise_27161
2064
+ 805570882_neutral_44714
2065
+ 805570882_happy_54208
2066
+ 805570882_happy_46372
2067
+ 805570882_happy_27414
2068
+ 805570882_neutral_61286
2069
+ 805570882_neutral_57172
2070
+ 805570882_surprise_61607
2071
+ 805570882_surprise_18960
2072
+ 805570882_happy_14985
2073
+ 805570882_surprise_57706
2074
+ 805570882_angry_57594
2075
+ 805570882_happy_74541
2076
+ 805570882_neutral_64744
2077
+ 805570882_fear_15330
2078
+ 805570882_surprise_46847
2079
+ 805570882_surprise_37014
2080
+ 805570882_happy_36985
2081
+ 805570882_sad_45958
2082
+ 805570882_angry_46626
2083
+ 805570882_sad_64996
2084
+ 805570882_sad_45903
2085
+ 805570882_sad_36718
2086
+ 805570882_angry_35151
2087
+ 805570882_angry_56645
2088
+ 805570882_fear_20375
2089
+ 805570882_happy_24277
2090
+ 805570882_happy_37473
2091
+ 805570882_surprise_17290
2092
+ 805570882_happy_18814
2093
+ 805570882_sad_14339
2094
+ 805570882_neutral_44810
2095
+ 805570882_happy_70952
2096
+ 805570882_neutral_34648
2097
+ 805570882_neutral_26369
2098
+ 805570882_happy_22644
2099
+ 805570882_neutral_46656
2100
+ 805570882_angry_14690
2101
+ 805570882_happy_24477
2102
+ 805570882_surprise_35688
2103
+ 805570882_sad_46545
2104
+ 805570882_angry_55111
2105
+ 805570882_surprise_21310
2106
+ 805570882_happy_43058
2107
+ 805570882_sad_42251
2108
+ 805570882_happy_40883
2109
+ 805570882_angry_18098
2110
+ 805570882_sad_41664
2111
+ 805570882_happy_35949
2112
+ 805570882_fear_35543
2113
+ 805570882_fear_61437
2114
+ 805570882_sad_15272
2115
+ 805570882_sad_48170
2116
+ 805570882_fear_23241
2117
+ 805570882_fear_64098
2118
+ 805570882_surprise_64104
2119
+ 805570882_sad_41350
2120
+ 805570882_neutral_65988
2121
+ 805570882_neutral_74084
2122
+ 805570882_neutral_17813
2123
+ 805570882_happy_33131
2124
+ 805570882_happy_55365
2125
+ 805570882_sad_35841
2126
+ 805570882_surprise_44404
2127
+ 805570882_fear_34376
2128
+ 805570882_fear_44685
2129
+ 805570882_surprise_44820
2130
+ 805570882_happy_44584
2131
+ 805570882_sad_16512
2132
+ 805570882_fear_34771
2133
+ 805570882_surprise_61744
2134
+ 805570882_neutral_48277
2135
+ 805570882_angry_41572
2136
+ 805570882_sad_24911
2137
+ 805570882_surprise_70680
2138
+ 805570882_angry_26986
2139
+ 805570882_neutral_34628
2140
+ 805570882_surprise_28416
2141
+ 805570882_surprise_46130
2142
+ 805570882_surprise_61625
2143
+ 805570882_fear_36741
2144
+ 805570882_surprise_37170
2145
+ 805570882_angry_61095
2146
+ 805570882_fear_40673
2147
+ 805570882_surprise_74044
2148
+ 805570882_angry_61014
2149
+ 805570882_sad_27084
2150
+ 805570882_neutral_27669
2151
+ 805570882_fear_31903
2152
+ 805570882_sad_19276
2153
+ 805570882_sad_21274
2154
+ 805570882_fear_41205
2155
+ 805570882_fear_19325
2156
+ 805570882_fear_24560
2157
+ 805570882_happy_73913
2158
+ 805570882_happy_55913
2159
+ 805570882_neutral_27069
2160
+ 805570882_fear_64901
2161
+ 805570882_fear_20120
2162
+ 805570882_sad_60947
2163
+ 805570882_neutral_60957
2164
+ 805570882_fear_16265
2165
+ 805570882_fear_74123
2166
+ 805570882_happy_14416
2167
+ 805570882_happy_15471
2168
+ 805570882_surprise_55764
2169
+ 805570882_angry_20097
2170
+ 805570882_happy_37767
2171
+ 805570882_happy_27203
2172
+ 805570882_happy_23190
2173
+ 805570882_fear_18275
2174
+ 805570882_neutral_46402
2175
+ 805570882_fear_37313
2176
+ 805570882_surprise_73888
2177
+ 805570882_fear_35828
2178
+ 805570882_neutral_15230
2179
+ 805570882_fear_36558
2180
+ 805570882_angry_38076
2181
+ 805570882_surprise_48138
2182
+ 805570882_happy_29586
2183
+ 805570882_neutral_44205
2184
+ 805570882_surprise_24405
2185
+ 805570882_fear_28787
2186
+ 805570882_happy_43294
2187
+ 805570882_sad_40517
2188
+ 805570882_neutral_58436
2189
+ 805570882_neutral_37748
2190
+ 805570882_sad_41625
2191
+ 805570882_happy_61664
2192
+ 805570882_fear_15969
2193
+ 805570882_fear_29443
2194
+ 805570882_angry_16602
2195
+ 805570882_angry_70760
2196
+ 805570882_surprise_40797
2197
+ 805570882_angry_55791
2198
+ 805570882_surprise_47016
2199
+ 805570882_fear_26306
2200
+ 805570882_surprise_31875
2201
+ 805570882_surprise_15356
2202
+ 805570882_happy_40592
2203
+ 805570882_sad_61577
2204
+ 805570882_happy_61061
2205
+ 805570882_happy_64949
2206
+ 805570882_angry_74038
2207
+ 805570882_surprise_65733
2208
+ 805570882_surprise_64997
2209
+ 805570882_sad_28395
2210
+ 805570882_sad_35556
2211
+ 805570882_sad_24483
2212
+ 805570882_neutral_49065
2213
+ 805570882_happy_63099
2214
+ 805570882_angry_46129
2215
+ 805570882_happy_15869
2216
+ 805570882_neutral_36074
2217
+ 805570882_angry_24818
2218
+ 805570882_angry_42984
2219
+ 805570882_happy_24219
2220
+ 805570882_happy_21206
2221
+ 805570882_sad_18028
2222
+ 805570882_surprise_54204
2223
+ 805570882_sad_31902
2224
+ 805570882_sad_28549
2225
+ 805570882_happy_33149
2226
+ 805570882_surprise_46437
2227
+ 805570882_sad_18444
2228
+ 805570882_happy_28690
2229
+ 805570882_happy_46905
2230
+ 805570882_happy_74102
2231
+ 805570882_happy_26882
2232
+ 805570882_surprise_74993
2233
+ 805570882_angry_37837
2234
+ 805570882_happy_27631
2235
+ 805570882_surprise_46336
2236
+ 805570882_surprise_36198
2237
+ 805570882_happy_28669
2238
+ 805570882_surprise_19416
2239
+ 805570882_sad_63826
2240
+ 805570882_angry_20442
2241
+ 805570882_happy_34397
2242
+ 805570882_angry_33163
2243
+ 805570882_sad_27380
2244
+ 805570882_angry_23856
2245
+ 805570882_surprise_27461
2246
+ 805570882_sad_74154
2247
+ 805570882_neutral_64943
2248
+ 805570882_fear_46191
2249
+ 805570882_sad_74101
2250
+ 805570882_neutral_70794
2251
+ 805570882_angry_24173
2252
+ 805570882_sad_18449
2253
+ 805570882_fear_18543
2254
+ 805570882_surprise_56647
2255
+ 805570882_happy_74243
2256
+ 805570882_angry_48497
2257
+ 805570882_happy_45619
2258
+ 805570882_angry_47424
2259
+ 805570882_fear_63862
2260
+ 805570882_sad_70827
2261
+ 805570882_neutral_18169
2262
+ 805570882_sad_54924
2263
+ 805570882_neutral_43894
2264
+ 805570882_happy_48453
2265
+ 805570882_angry_36682
2266
+ 805570882_fear_24568
2267
+ 805570882_angry_36577
2268
+ 805570882_neutral_24459
2269
+ 805570882_sad_43832
2270
+ 805570882_neutral_26317
2271
+ 805570882_happy_36871
2272
+ 805570882_angry_49074
2273
+ 805570882_happy_23096
2274
+ 805570882_angry_55175
2275
+ 805570882_angry_73981
2276
+ 805570882_fear_22862
2277
+ 805570882_angry_44402
2278
+ 805570882_happy_18715
2279
+ 805570882_angry_42878
2280
+ 805570882_happy_55801
2281
+ 805570882_neutral_56565
2282
+ 805570882_sad_45589
2283
+ 805570882_neutral_18884
2284
+ 805570882_sad_36451
2285
+ 805570882_fear_15908
2286
+ 805570882_angry_35058
2287
+ 805570882_sad_55326
2288
+ 805570882_fear_24537
2289
+ 805570882_happy_37715
2290
+ 805570882_fear_44362
2291
+ 805570882_sad_46960
2292
+ 805570882_neutral_64722
2293
+ 805570882_fear_58429
2294
+ 805570882_surprise_63728
2295
+ 805570882_happy_74136
2296
+ 805570882_angry_74383
2297
+ 805570882_angry_27691
2298
+ 805570882_sad_40904
2299
+ 805570882_happy_43095
2300
+ 805570882_surprise_27308
2301
+ 805570882_surprise_22743
2302
+ 805570882_surprise_74375
2303
+ 805570882_angry_24263
2304
+ 805570882_sad_55654
2305
+ 805570882_surprise_44621
2306
+ 805570882_surprise_17265
2307
+ 805570882_fear_18235
2308
+ 805570882_happy_61687
2309
+ 805570882_happy_17122
2310
+ 805570882_surprise_49091
2311
+ 805570882_sad_45019
2312
+ 805570882_angry_55698
2313
+ 805570882_happy_38239
2314
+ 805570882_angry_63750
2315
+ 805570882_surprise_21282
2316
+ 805570882_neutral_61308
2317
+ 805570882_angry_56842
2318
+ 805570882_fear_61004
2319
+ 805570882_angry_64113
2320
+ 805570882_surprise_40636
2321
+ 805570882_surprise_64012
2322
+ 805570882_sad_17165
2323
+ 805570882_angry_70801
2324
+ 805570882_surprise_17987
2325
+ 805570882_sad_41760
2326
+ 805570882_fear_26385
2327
+ 805570882_surprise_47489
2328
+ 805570882_neutral_42826
2329
+ 805570882_sad_70797
2330
+ 805570882_neutral_20461
2331
+ 805570882_fear_18032
2332
+ 805570882_fear_26553
2333
+ 805570882_neutral_20083
2334
+ 805570882_fear_18354
2335
+ 805570882_sad_29457
2336
+ 805570882_happy_35579
2337
+ 805570882_angry_34344
2338
+ 805570882_sad_43777
2339
+ 805570882_sad_40970
2340
+ 805570882_fear_54028
2341
+ 805570882_angry_27067
2342
+ 805570882_angry_44897
2343
+ 805570882_surprise_46445
2344
+ 805570882_sad_47428
2345
+ 805570882_fear_70622
2346
+ 805570882_angry_23129
2347
+ 805570882_sad_17972
2348
+ 805570882_fear_46616
2349
+ 805570882_sad_44773
2350
+ 805570882_fear_28542
2351
+ 805570882_happy_48800
2352
+ 805570882_angry_35127
2353
+ 805570882_sad_61500
2354
+ 805570882_neutral_75016
2355
+ 805570882_neutral_36072
2356
+ 805570882_neutral_61615
2357
+ 805570882_fear_16295
2358
+ 805570882_surprise_58382
2359
+ 805570882_happy_34531
2360
+ 805570882_surprise_19406
2361
+ 805570882_surprise_36006
2362
+ 805570882_fear_55294
2363
+ 805570882_fear_24074
2364
+ 805570882_neutral_31962
2365
+ 805570882_surprise_27176
2366
+ 805570882_happy_14980
2367
+ 805570882_fear_63939
2368
+ 805570882_fear_43260
2369
+ 805570882_angry_44575
2370
+ 805570882_neutral_70862
2371
+ 805570882_angry_17250
2372
+ 805570882_neutral_18988
2373
+ 805570882_sad_61235
2374
+ 805570882_neutral_27618
2375
+ 805570882_surprise_38034
2376
+ 805570882_fear_46208
2377
+ 805570882_fear_36014
2378
+ 805570882_happy_18309
2379
+ 805570882_angry_15891
2380
+ 805570882_neutral_16257
2381
+ 805570882_happy_40582
2382
+ 805570882_happy_74185
2383
+ 805570882_angry_70956
2384
+ 805570882_angry_20488
2385
+ 805570882_happy_16480
2386
+ 805570882_fear_42760
2387
+ 805570882_fear_20226
2388
+ 805570882_fear_74326
2389
+ 805570882_angry_25008
2390
+ 805570882_surprise_55020
2391
+ 805570882_fear_56802
2392
+ 805570882_happy_55554
2393
+ 805570882_neutral_26881
2394
+ 805570882_fear_46976
2395
+ 805570882_surprise_40822
2396
+ 805570882_sad_55763
2397
+ 805570882_surprise_40481
2398
+ 805570882_sad_35576
2399
+ 805570882_fear_40612
2400
+ 805570882_happy_37463
2401
+ 805570882_surprise_57210
2402
+ 805570882_sad_24488
2403
+ 805570882_fear_34383
2404
+ 805570882_happy_21246
2405
+ 805570882_happy_20300
2406
+ 805570882_angry_61639
2407
+ 805570882_angry_54906
2408
+ 805570882_sad_46145
2409
+ 805570882_fear_28671
2410
+ 805570882_happy_14569
2411
+ 805570882_fear_41554
2412
+ 805570882_sad_42932
2413
+ 805570882_surprise_74053
2414
+ 805570882_fear_35686
2415
+ 805570882_fear_46787
2416
+ 805570882_neutral_40669
2417
+ 805570882_sad_43341
2418
+ 805570882_neutral_41952
2419
+ 805570882_happy_18079
2420
+ 805570882_fear_54026
2421
+ 805570882_neutral_65851
2422
+ 805570882_angry_19997
2423
+ 805570882_angry_45722
2424
+ 805570882_neutral_46518
2425
+ 805570882_happy_38326
2426
+ 805570882_fear_64767
2427
+ 805570882_happy_28428
2428
+ 805570882_surprise_43871
2429
+ 805570882_angry_54114
2430
+ 805570882_surprise_73982
2431
+ 805570882_fear_34900
2432
+ 805570882_neutral_41406
2433
+ 805570882_angry_18540
2434
+ 805570882_fear_46728
2435
+ 805570882_angry_26954
2436
+ 805570882_angry_41320
2437
+ 805570882_happy_20431
2438
+ 805570882_neutral_63185
2439
+ 805570882_happy_44966
2440
+ 805570882_fear_23249
2441
+ 805570882_sad_36607
2442
+ 805570882_happy_55715
2443
+ 805570882_neutral_38130
2444
+ 805570882_surprise_36411
2445
+ 805570882_surprise_17810
2446
+ 805570882_neutral_14344
2447
+ 805570882_happy_14687
2448
+ 805570882_happy_27187
2449
+ 805570882_neutral_43794
2450
+ 805570882_happy_27276
2451
+ 805570882_fear_48221
2452
+ 805570882_fear_58438
2453
+ 805570882_neutral_65788
2454
+ 805570882_surprise_37650
2455
+ 805570882_neutral_18744
2456
+ 805570882_surprise_16349
2457
+ 805570882_neutral_74410
2458
+ 805570882_surprise_41676
2459
+ 805570882_angry_54015
2460
+ 805570882_fear_31898
2461
+ 805570882_fear_19238
2462
+ 805570882_surprise_21486
2463
+ 805570882_angry_36096
2464
+ 805570882_neutral_17049
2465
+ 805570882_fear_61181
2466
+ 805570882_neutral_74260
2467
+ 805570882_fear_35850
2468
+ 805570882_surprise_23130
2469
+ 805570882_sad_27575
2470
+ 805570882_fear_17982
2471
+ 805570882_surprise_28426
2472
+ 805570882_sad_37028
2473
+ 805570882_neutral_20474
2474
+ 805570882_sad_31765
2475
+ 805570882_surprise_64080
2476
+ 805570882_neutral_35727
2477
+ 805570882_sad_40411
2478
+ 805570882_surprise_63077
2479
+ 805570882_happy_18196
2480
+ 805570882_fear_38332
2481
+ 805570882_neutral_21311
2482
+ 805570882_fear_35121
2483
+ 805570882_fear_61302
2484
+ 805570882_neutral_46283
2485
+ 805570882_surprise_26325
2486
+ 805570882_neutral_75024
2487
+ 805570882_sad_44153
2488
+ 805570882_neutral_43290
2489
+ 805570882_surprise_40796
2490
+ 805570882_surprise_56677
2491
+ 805570882_neutral_44278
2492
+ 805570882_sad_64847
2493
+ 805570882_neutral_40950
2494
+ 805570882_neutral_24725
2495
+ 805570882_happy_45933
2496
+ 805570882_neutral_41093
2497
+ 805570882_sad_74537
2498
+ 805570882_fear_36291
2499
+ 805570882_neutral_28684
2500
+ 805570882_sad_27840
2501
+ 805570882_sad_47406
2502
+ 805570882_fear_18248
2503
+ 805570882_angry_18475
2504
+ 805570882_surprise_17186
2505
+ 805570882_sad_34715
2506
+ 805570882_neutral_57434
2507
+ 805570882_surprise_34634
2508
+ 805570882_sad_66049
2509
+ 805570882_sad_47029
2510
+ 805570882_sad_34696
2511
+ 805570882_surprise_44367
2512
+ 805570882_angry_70972
2513
+ 805570882_sad_24382
2514
+ 805570882_surprise_16570
2515
+ 805570882_neutral_16319
2516
+ 805570882_fear_74318
2517
+ 805570882_sad_61412
2518
+ 805570882_happy_42898
2519
+ 805570882_angry_24143
2520
+ 805570882_fear_45850
2521
+ 805570882_sad_21236
2522
+ 805570882_sad_19333
2523
+ 805570882_fear_74962
2524
+ 805570882_happy_44744
2525
+ 805570882_fear_26294
2526
+ 805570882_happy_42103
2527
+ 805570882_angry_23228
2528
+ 805570882_fear_54228
2529
+ 805570882_surprise_19944
2530
+ 805570882_neutral_31896
2531
+ 805570882_happy_35561
2532
+ 805570882_fear_36307
2533
+ 805570882_neutral_14814
2534
+ 805570882_surprise_54374
2535
+ 805570882_surprise_18887
2536
+ 805570882_fear_74412
2537
+ 805570882_angry_29425
2538
+ 805570882_sad_37525
2539
+ 805570882_angry_45047
2540
+ 805570882_neutral_36942
2541
+ 805570882_neutral_26340
2542
+ 805570882_sad_36115
2543
+ 805570882_neutral_17074
2544
+ 805570882_angry_16314
2545
+ 805570882_fear_41781
2546
+ 805570882_neutral_44648
2547
+ 805570882_angry_46176
2548
+ 805570882_sad_36877
2549
+ 805570882_happy_28402
2550
+ 805570882_fear_70908
2551
+ 805570882_angry_19353
2552
+ 805570882_happy_48758
2553
+ 805570882_angry_24907
2554
+ 805570882_happy_23983
2555
+ 805570882_surprise_46574
2556
+ 805570882_angry_48184
2557
+ 805570882_happy_43337
2558
+ 805570882_surprise_28225
2559
+ 805570882_fear_58360
2560
+ 805570882_neutral_70768
2561
+ 805570882_angry_54359
2562
+ 805570882_surprise_17886
2563
+ 805570882_sad_14928
2564
+ 805570882_angry_47461
2565
+ 805570882_happy_57612
2566
+ 805570882_fear_47400
2567
+ 805570882_fear_43115
2568
+ 805570882_sad_38294
2569
+ 805570882_fear_37066
2570
+ 805570882_sad_41239
2571
+ 805570882_sad_26998
2572
+ 805570882_sad_36878
2573
+ 805570882_surprise_15990
2574
+ 805570882_neutral_64022
2575
+ 805570882_neutral_74311
2576
+ 805570882_fear_40969
2577
+ 805570882_sad_54073
2578
+ 805570882_happy_20096
2579
+ 805570882_happy_44269
2580
+ 805570882_sad_16227
2581
+ 805570882_happy_56811
2582
+ 805570882_surprise_44739
2583
+ 805570882_neutral_22838
2584
+ 805570882_fear_24750
2585
+ 805570882_sad_61317
2586
+ 805570882_happy_18770
2587
+ 805570882_fear_65079
2588
+ 805570882_neutral_45632
2589
+ 805570882_fear_27093
2590
+ 805570882_happy_43363
2591
+ 805570882_sad_18582
2592
+ 805570882_angry_73941
2593
+ 805570882_neutral_64987
2594
+ 805570882_angry_58485
2595
+ 805570882_happy_44763
2596
+ 805570882_sad_35527
2597
+ 805570882_neutral_17277
2598
+ 805570882_angry_45954
2599
+ 805570882_fear_47127
2600
+ 805570882_sad_18546
2601
+ 805570882_happy_20105
2602
+ 805570882_neutral_43782
2603
+ 805570882_neutral_27722
2604
+ 805570882_sad_47028
2605
+ 805570882_fear_43078
2606
+ 805570882_fear_57715
2607
+ 805570882_neutral_19982
2608
+ 805570882_neutral_23188
2609
+ 805570882_sad_19319
2610
+ 805570882_surprise_34458
2611
+ 805570882_fear_54038
2612
+ 805570882_happy_27621
2613
+ 805570882_neutral_37377
2614
+ 805570882_surprise_38056
2615
+ 805570882_angry_15321
2616
+ 805570882_fear_54115
2617
+ 805570882_fear_44867
2618
+ 805570882_angry_62965
2619
+ 805570882_angry_18574
2620
+ 805570882_fear_26440
2621
+ 805570882_angry_27808
2622
+ 805570882_neutral_42058
2623
+ 805570882_neutral_18811
2624
+ 805570882_fear_45840
2625
+ 805570882_sad_56528
2626
+ 805570882_surprise_44515
2627
+ 805570882_sad_28365
2628
+ 805570882_happy_47439
2629
+ 805570882_angry_63870
2630
+ 805570882_surprise_17828
2631
+ 805570882_fear_41527
2632
+ 805570882_fear_14986
2633
+ 805570882_fear_42079
2634
+ 805570882_angry_44423
2635
+ 805570882_angry_34303
2636
+ 805570882_neutral_64079
2637
+ 805570882_angry_15508
2638
+ 805570882_neutral_22681
2639
+ 805570882_angry_43393
2640
+ 805570882_fear_18620
2641
+ 805570882_surprise_29606
2642
+ 805570882_sad_56914
2643
+ 805570882_sad_22628
2644
+ 805570882_angry_28322
2645
+ 805570882_neutral_45780
2646
+ 805570882_happy_45865
2647
+ 805570882_happy_14725
2648
+ 805570882_fear_19054
2649
+ 805570882_sad_28682
2650
+ 805570882_sad_24821
2651
+ 805570882_neutral_20447
2652
+ 805570882_neutral_54139
2653
+ 805570882_fear_43053
2654
+ 805570882_sad_45853
2655
+ 805570882_surprise_42146
2656
+ 805570882_angry_22837
2657
+ 805570882_angry_28818
2658
+ 805570882_happy_35757
2659
+ 805570882_happy_17169
2660
+ 805570882_angry_63191
2661
+ 805570882_sad_14681
2662
+ 805570882_fear_18623
2663
+ 805570882_neutral_56893
2664
+ 805570882_neutral_36339
2665
+ 805570882_neutral_64793
2666
+ 805570882_fear_61020
2667
+ 805570882_fear_61760
2668
+ 805570882_neutral_57374
2669
+ 805570882_fear_27793
2670
+ 805570882_neutral_20215
2671
+ 805570882_surprise_34473
2672
+ 805570882_neutral_70777
2673
+ 805570882_happy_61045
2674
+ 805570882_neutral_55405
2675
+ 805570882_angry_14669
2676
+ 805570882_fear_28673
2677
+ 805570882_surprise_45736
2678
+ 805570882_neutral_24169
2679
+ 805570882_angry_24206
2680
+ 805570882_angry_21250
2681
+ 805570882_sad_23097
2682
+ 805570882_neutral_41383
2683
+ 805570882_neutral_46968
2684
+ 805570882_fear_37989
2685
+ 805570882_sad_26499
2686
+ 805570882_neutral_45863
2687
+ 805570882_sad_74386
2688
+ 805570882_angry_61514
2689
+ 805570882_surprise_61528
2690
+ 805570882_fear_24521
2691
+ 805570882_fear_66097
2692
+ 805570882_surprise_26387
2693
+ 805570882_sad_74310
2694
+ 805570882_angry_43884
2695
+ 805570882_neutral_47429
2696
+ 805570882_angry_37887
2697
+ 805570882_sad_28582
2698
+ 805570882_surprise_36087
2699
+ 805570882_sad_20287
2700
+ 805570882_neutral_14946
2701
+ 805570882_happy_18257
2702
+ 805570882_happy_46488
2703
+ 805570882_angry_49151
2704
+ 805570882_sad_40927
2705
+ 805570882_fear_43801
2706
+ 805570882_angry_75113
2707
+ 805570882_sad_29402
2708
+ 805570882_surprise_70883
2709
+ 805570882_happy_65968
2710
+ 805570882_happy_35616
2711
+ 805570882_neutral_35560
2712
+ 805570882_surprise_46668
2713
+ 805570882_fear_24301
2714
+ 805570882_surprise_46642
2715
+ 805570882_sad_49262
2716
+ 805570882_surprise_16006
2717
+ 805570882_angry_43905
2718
+ 805570882_happy_46758
2719
+ 805570882_neutral_18957
2720
+ 805570882_sad_34427
2721
+ 805570882_neutral_56527
2722
+ 805570882_neutral_43849
2723
+ 805570882_fear_38269
2724
+ 805570882_surprise_38234
2725
+ 805570882_angry_18674
2726
+ 805570882_angry_24205
2727
+ 805570882_angry_57557
2728
+ 805570882_angry_57265
2729
+ 805570882_happy_27231
2730
+ 805570882_angry_48102
2731
+ 805570882_surprise_48317
2732
+ 805570882_angry_26432
2733
+ 805570882_angry_15332
2734
+ 805570882_sad_36847
2735
+ 805570882_happy_61571
2736
+ 805570882_neutral_21408
2737
+ 805570882_surprise_43376
2738
+ 805570882_neutral_34939
2739
+ 805570882_fear_28328
2740
+ 805570882_angry_40588
2741
+ 805570882_surprise_21262
2742
+ 805570882_surprise_34709
2743
+ 805570882_surprise_14493
2744
+ 805570882_happy_19012
2745
+ 805570882_neutral_26938
2746
+ 805570882_happy_35972
2747
+ 805570882_happy_28844
2748
+ 805570882_sad_40390
2749
+ 805570882_surprise_58370
2750
+ 805570882_angry_46759
2751
+ 805570882_neutral_22871
2752
+ 805570882_fear_16729
2753
+ 805570882_happy_43948
2754
+ 805570882_angry_63843
2755
+ 805570882_surprise_49334
2756
+ 805570882_fear_64750
2757
+ 805570882_sad_37043
2758
+ 805570882_fear_57443
2759
+ 805570882_happy_33344
2760
+ 805570882_neutral_34782
2761
+ 805570882_sad_37673
2762
+ 805570882_neutral_24112
2763
+ 805570882_angry_43630
2764
+ 805570882_angry_24436
2765
+ 805570882_surprise_27318
2766
+ 805570882_angry_16578
2767
+ 805570882_neutral_36009
2768
+ 805570882_angry_64706
2769
+ 805570882_happy_20095
2770
+ 805570882_sad_64918
2771
+ 805570882_happy_37318
2772
+ 805570882_happy_34485
2773
+ 805570882_neutral_65807
2774
+ 805570882_sad_44803
2775
+ 805570882_angry_38351
2776
+ 805570882_neutral_36490
2777
+ 805570882_sad_46804
2778
+ 805570882_sad_74519
2779
+ 805570882_sad_46479
2780
+ 805570882_surprise_20144
2781
+ 805570882_surprise_40814
2782
+ 805570882_fear_37260
2783
+ 805570882_fear_18549
2784
+ 805570882_surprise_35120
2785
+ 805570882_neutral_46261
2786
+ 805570882_angry_70976
2787
+ 805570882_sad_46962
2788
+ 805570882_angry_35605
2789
+ 805570882_surprise_24890
2790
+ 805570882_neutral_35021
2791
+ 805570882_surprise_46660
2792
+ 805570882_sad_41349
2793
+ 805570882_angry_48755
2794
+ 805570882_sad_27704
2795
+ 805570882_angry_36791
2796
+ 805570882_happy_14849
2797
+ 805570882_neutral_44437
2798
+ 805570882_neutral_70643
2799
+ 805570882_angry_37323
2800
+ 805570882_neutral_74089
2801
+ 805570882_happy_45920
2802
+ 805570882_surprise_27592
2803
+ 805570882_sad_43514
2804
+ 805570882_fear_37306
2805
+ 805570882_happy_36235
2806
+ 805570882_neutral_45657
2807
+ 805570882_neutral_43590
2808
+ 805570882_surprise_27124
2809
+ 805570882_surprise_18223
2810
+ 805570882_neutral_55261
2811
+ 805570882_sad_70675
2812
+ 805570882_angry_31729
2813
+ 805570882_sad_40847
2814
+ 805570882_fear_42203
2815
+ 805570882_surprise_27298
2816
+ 805570882_fear_23292
2817
+ 805570882_fear_47447
2818
+ 805570882_angry_48365
2819
+ 805570882_happy_27624
2820
+ 805570882_neutral_18190
2821
+ 805570882_surprise_49088
2822
+ 805570882_sad_44559
2823
+ 805570882_happy_28226
2824
+ 805570882_fear_46916
2825
+ 805570882_angry_36922
2826
+ 805570882_sad_62892
2827
+ 805570882_surprise_47131
2828
+ 805570882_fear_46547
2829
+ 805570882_angry_48893
2830
+ 805570882_neutral_44394
2831
+ 805570882_happy_18904
2832
+ 805570882_neutral_64899
2833
+ 805570882_angry_19417
2834
+ 805570882_angry_18698
2835
+ 805570882_happy_33218
2836
+ 805570882_angry_44343
2837
+ 805570882_surprise_16236
2838
+ 805570882_fear_17760
2839
+ 805570882_happy_18576
2840
+ 805570882_fear_36518
2841
+ 805570882_fear_28736
2842
+ 805570882_sad_45585
2843
+ 805570882_fear_17969
2844
+ 805570882_happy_56715
2845
+ 805570882_surprise_46414
2846
+ 805570882_neutral_64910
2847
+ 805570882_sad_17043
2848
+ 805570882_happy_23904
2849
+ 805570882_sad_74539
2850
+ 805570882_happy_49089
2851
+ 805570882_happy_55289
2852
+ 805570882_happy_74141
2853
+ 805570882_happy_38212
2854
+ 805570882_surprise_61185
2855
+ 805570882_neutral_74079
2856
+ 805570882_surprise_14911
2857
+ 805570882_happy_43445
2858
+ 805570882_sad_24312
2859
+ 805570882_fear_42821
2860
+ 805570882_fear_16134
2861
+ 805570882_happy_36433
2862
+ 805570882_surprise_63989
2863
+ 805570882_sad_33398
2864
+ 805570882_surprise_55438
2865
+ 805570882_angry_55229
2866
+ 805570882_surprise_36972
2867
+ 805570882_fear_55610
2868
+ 805570882_angry_66011
2869
+ 805570882_angry_61056
2870
+ 805570882_happy_54161
2871
+ 805570882_angry_46856
2872
+ 805570882_surprise_73883
2873
+ 805570882_neutral_65086
2874
+ 805570882_sad_44218
2875
+ 805570882_neutral_23036
2876
+ 805570882_surprise_37596
2877
+ 805570882_happy_16205
2878
+ 805570882_angry_55711
2879
+ 805570882_sad_35891
2880
+ 805570882_fear_49161
2881
+ 805570882_surprise_74043
2882
+ 805570882_angry_41026
2883
+ 805570882_sad_21372
2884
+ 805570882_angry_48428
2885
+ 805570882_angry_65008
2886
+ 805570882_happy_56627
2887
+ 805570882_angry_56682
2888
+ 805570882_surprise_41343
2889
+ 805570882_neutral_35752
2890
+ 805570882_angry_21416
2891
+ 805570882_fear_15232
2892
+ 805570882_fear_18329
2893
+ 805570882_surprise_34852
2894
+ 805570882_angry_75029
2895
+ 805570882_surprise_37853
2896
+ 805570882_happy_29492
2897
+ 805570882_angry_37338
2898
+ 805570882_surprise_74333
2899
+ 805570882_fear_42912
2900
+ 805570882_fear_23077
2901
+ 805570882_fear_29652
2902
+ 805570882_fear_44313
2903
+ 805570882_neutral_74210
2904
+ 805570882_angry_46183
2905
+ 805570882_angry_34822
2906
+ 805570882_surprise_41500
2907
+ 805570882_sad_43356
2908
+ 805570882_happy_43524
2909
+ 805570882_neutral_44151
2910
+ 805570882_sad_54921
2911
+ 805570882_neutral_23191
2912
+ 805570882_fear_26895
2913
+ 805570882_happy_42836
2914
+ 805570882_angry_33244
2915
+ 805570882_surprise_55169
2916
+ 805570882_angry_74535
2917
+ 805570882_happy_65904
2918
+ 805570882_happy_61098
2919
+ 805570882_sad_43537
2920
+ 805570882_angry_20270
2921
+ 805570882_angry_29645
2922
+ 805570882_fear_57474
2923
+ 805570882_neutral_24535
2924
+ 805570882_fear_54218
2925
+ 805570882_happy_24996
2926
+ 805570882_happy_48313
2927
+ 805570882_surprise_44357
2928
+ 805570882_surprise_45547
2929
+ 805570882_neutral_44223
2930
+ 805570882_sad_36924
2931
+ 805570882_angry_63212
2932
+ 805570882_happy_34551
2933
+ 805570882_happy_24002
2934
+ 805570882_fear_63804
2935
+ 805570882_happy_46403
2936
+ 805570882_surprise_31755
2937
+ 805570882_neutral_41566
2938
+ 805570882_neutral_44401
2939
+ 805570882_neutral_55269
2940
+ 805570882_sad_43432
2941
+ 805570882_neutral_26459
2942
+ 805570882_fear_15910
2943
+ 805570882_sad_34699
2944
+ 805570882_neutral_48805
2945
+ 805570882_surprise_15003
2946
+ 805570882_sad_66030
2947
+ 805570882_angry_31742
2948
+ 805570882_neutral_54239
2949
+ 805570882_surprise_45007
2950
+ 805570882_surprise_44400
2951
+ 805570882_angry_31748
2952
+ 805570882_neutral_14732
2953
+ 805570882_neutral_29601
2954
+ 805570882_happy_43634
2955
+ 805570882_angry_55744
2956
+ 805570882_fear_34835
2957
+ 805570882_happy_24280
2958
+ 805570882_fear_15881
2959
+ 805570882_sad_15829
2960
+ 805570882_surprise_36403
2961
+ 805570882_angry_16341
2962
+ 805570882_angry_70658
2963
+ 805570882_neutral_57355
2964
+ 805570882_neutral_23973
2965
+ 805570882_angry_22996
2966
+ 805570882_angry_42843
2967
+ 805570882_fear_16641
2968
+ 805570882_sad_54889
2969
+ 805570882_surprise_57291
2970
+ 805570882_surprise_28900
2971
+ 805570882_happy_37635
2972
+ 805570882_fear_46304
2973
+ 805570882_surprise_19466
2974
+ 805570882_angry_73857
2975
+ 805570882_surprise_46611
2976
+ 805570882_surprise_56538
2977
+ 805570882_neutral_21202
2978
+ 805570882_angry_27445
2979
+ 805570882_happy_29613
2980
+ 805570882_angry_19294
2981
+ 805570882_angry_18344
2982
+ 805570882_angry_35037
2983
+ 805570882_surprise_36661
2984
+ 805570882_happy_56537
2985
+ 805570882_surprise_40461
2986
+ 805570882_happy_46608
2987
+ 805570882_happy_18218
2988
+ 805570882_angry_34937
2989
+ 805570882_sad_38101
2990
+ 805570882_surprise_45796
2991
+ 805570882_angry_65886
2992
+ 805570882_fear_55795
2993
+ 805570882_fear_57597
2994
+ 805570882_happy_33126
2995
+ 805570882_angry_24308
2996
+ 805570882_angry_27183
2997
+ 805570882_sad_37126
2998
+ 805570882_sad_44661
2999
+ 805570882_angry_31784
3000
+ 805570882_happy_35084
3001
+ 805570882_fear_61001
3002
+ 805570882_sad_57482
3003
+ 805570882_angry_41932
3004
+ 805570882_angry_34867
3005
+ 805570882_neutral_44677
3006
+ 805570882_neutral_27202
3007
+ 805570882_angry_33349
3008
+ 805570882_angry_37831
3009
+ 805570882_neutral_14872
3010
+ 805570882_surprise_41884
3011
+ 805570882_neutral_41604
3012
+ 805570882_neutral_36220
3013
+ 805570882_neutral_64840
3014
+ 805570882_neutral_15484
3015
+ 805570882_happy_41543
3016
+ 805570882_neutral_57352
3017
+ 805570882_happy_28619
3018
+ 805570882_neutral_21264
3019
+ 805570882_angry_37528
3020
+ 805570882_surprise_36441
3021
+ 805570882_fear_64032
3022
+ 805570882_angry_36195
3023
+ 805570882_surprise_44933
3024
+ 805570882_neutral_58399
3025
+ 805570882_angry_71018
3026
+ 805570882_fear_44279
3027
+ 805570882_neutral_17777
3028
+ 805570882_sad_46527
3029
+ 805570882_neutral_42989
3030
+ 805570882_angry_26289
3031
+ 805570882_neutral_24748
3032
+ 805570882_surprise_57708
3033
+ 805570882_angry_23914
3034
+ 805570882_angry_31722
3035
+ 805570882_sad_41586
3036
+ 805570882_happy_38086
3037
+ 805570882_neutral_15482
3038
+ 805570882_surprise_37567
3039
+ 805570882_angry_27571
3040
+ 805570882_sad_55283
3041
+ 805570882_angry_65859
3042
+ 805570882_fear_36102
3043
+ 805570882_angry_18575
3044
+ 805570882_fear_57386
3045
+ 805570882_surprise_23110
3046
+ 805570882_happy_48919
3047
+ 805570882_angry_34847
3048
+ 805570882_neutral_18816
3049
+ 805570882_fear_44028
3050
+ 805570882_surprise_21450
3051
+ 805570882_sad_63198
3052
+ 805570882_sad_43168
3053
+ 805570882_surprise_45029
3054
+ 805570882_fear_37279
3055
+ 805570882_angry_58484
3056
+ 805570882_surprise_34390
3057
+ 805570882_neutral_46615
3058
+ 805570882_neutral_36721
3059
+ 805570882_angry_21224
3060
+ 805570882_surprise_38309
3061
+ 805570882_happy_22725
3062
+ 805570882_angry_33085
3063
+ 805570882_sad_38002
3064
+ 805570882_neutral_65777
3065
+ 805570882_neutral_44761
3066
+ 805570882_angry_16361
3067
+ 805570882_fear_14419
3068
+ 805570882_angry_63751
3069
+ 805570882_neutral_28417
3070
+ 805570882_surprise_17733
3071
+ 805570882_sad_61230
3072
+ 805570882_fear_14667
3073
+ 805570882_fear_61475
3074
+ 805570882_fear_35042
3075
+ 805570882_surprise_46657
3076
+ 805570882_surprise_23909
3077
+ 805570882_happy_64883
3078
+ 805570882_fear_48163
3079
+ 805570882_sad_37067
3080
+ 805570882_angry_17874
3081
+ 805570882_sad_45773
3082
+ 805570882_happy_37534
3083
+ 805570882_surprise_46548
3084
+ 805570882_fear_20357
3085
+ 805570882_sad_34756
3086
+ 805570882_sad_56618
3087
+ 805570882_neutral_54393
3088
+ 805570882_fear_41922
3089
+ 805570882_neutral_24475
3090
+ 805570882_neutral_17968
3091
+ 805570882_angry_24430
3092
+ 805570882_angry_31865
3093
+ 805570882_fear_57559
3094
+ 805570882_angry_73889
3095
+ 805570882_surprise_48905
3096
+ 805570882_surprise_42945
3097
+ 805570882_neutral_23222
3098
+ 805570882_surprise_14994
3099
+ 805570882_surprise_35761
3100
+ 805570882_angry_36555
3101
+ 805570882_sad_46816
3102
+ 805570882_angry_46201
3103
+ 805570882_neutral_43220
3104
+ 805570882_surprise_22791
3105
+ 805570882_neutral_46613
3106
+ 805570882_neutral_15514
3107
+ 805570882_fear_40961
3108
+ 805570882_sad_54391
3109
+ 805570882_angry_55678
3110
+ 805570882_surprise_58499
3111
+ 805570882_angry_36830
3112
+ 805570882_sad_63285
3113
+ 805570882_sad_70919
3114
+ 805570882_neutral_20281
3115
+ 805570882_sad_16273
3116
+ 805570882_angry_36468
3117
+ 805570882_fear_36052
3118
+ 805570882_sad_34726
3119
+ 805570882_neutral_16286
3120
+ 805570882_angry_61534
3121
+ 805570882_angry_37370
3122
+ 805570882_fear_54201
3123
+ 805570882_surprise_34766
3124
+ 805570882_surprise_14520
3125
+ 805570882_neutral_19001
3126
+ 805570882_neutral_41029
3127
+ 805570882_fear_63216
3128
+ 805570882_sad_54923
3129
+ 805570882_fear_19386
3130
+ 805570882_surprise_70901
3131
+ 805570882_sad_31854
3132
+ 805570882_neutral_46944
3133
+ 805570882_neutral_56605
3134
+ 805570882_surprise_24261
3135
+ 805570882_surprise_43416
3136
+ 805570882_happy_45947
3137
+ 805570882_angry_70792
3138
+ 805570882_neutral_24454
3139
+ 805570882_fear_44478
3140
+ 805570882_happy_63103
3141
+ 805570882_sad_23234
3142
+ 805570882_fear_37072
3143
+ 805570882_neutral_45795
3144
+ 805570882_surprise_14840
3145
+ 805570882_sad_36831
3146
+ 805570882_fear_19237
3147
+ 805570882_neutral_17300
3148
+ 805570882_angry_34850
3149
+ 805570882_happy_31917
3150
+ 805570882_neutral_29629
3151
+ 805570882_surprise_42085
3152
+ 805570882_sad_70640
3153
+ 805570882_sad_37579
3154
+ 805570882_angry_41944
3155
+ 805570882_sad_23976
3156
+ 805570882_happy_55242
3157
+ 805570882_fear_14737
3158
+ 805570882_happy_42941
3159
+ 805570882_surprise_55054
3160
+ 805570882_angry_19978
3161
+ 805570882_angry_74198
3162
+ 805570882_fear_40589
3163
+ 805570882_neutral_70750
3164
+ 805570882_happy_40868
3165
+ 805570882_happy_38233
3166
+ 805570882_angry_41263
3167
+ 805570882_angry_14716
3168
+ 805570882_angry_28329
3169
+ 805570882_angry_20465
3170
+ 805570882_angry_34734
3171
+ 805570882_neutral_64860
3172
+ 805570882_angry_63956
3173
+ 805570882_happy_48241
3174
+ 805570882_surprise_26994
3175
+ 805570882_fear_37057
3176
+ 805570882_fear_17743
3177
+ 805570882_angry_24985
3178
+ 805570882_fear_29556
3179
+ 805570882_surprise_46957
3180
+ 805570882_surprise_57833
3181
+ 805570882_neutral_19305
3182
+ 805570882_angry_35169
3183
+ 805570882_fear_57811
3184
+ 805570882_happy_26985
3185
+ 805570882_fear_29427
3186
+ 805570882_fear_64017
3187
+ 805570882_neutral_35156
3188
+ 805570882_happy_34826
3189
+ 805570882_neutral_43228
3190
+ 805570882_surprise_36526
3191
+ 805570882_sad_27636
3192
+ 805570882_neutral_61766
3193
+ 805570882_surprise_16371
3194
+ 805570882_neutral_26414
3195
+ 805570882_neutral_37225
3196
+ 805570882_sad_19864
3197
+ 805570882_surprise_63820
3198
+ 805570882_angry_23922
3199
+ 805570882_surprise_75124
3200
+ 805570882_sad_54268
3201
+ 805570882_neutral_24572
3202
+ 805570882_happy_37331
3203
+ 805570882_neutral_27325
3204
+ 805570882_neutral_61596
3205
+ 805570882_sad_19371
3206
+ 805570882_neutral_28440
3207
+ 805570882_neutral_58398
3208
+ 805570882_angry_17845
3209
+ 805570882_angry_38054
3210
+ 805570882_sad_64756
3211
+ 805570882_angry_27606
3212
+ 805570882_fear_64785
3213
+ 805570882_angry_17876
3214
+ 805570882_angry_40775
3215
+ 399172782_happy_73593
3216
+ 399172782_sad_65138
3217
+ 399172782_surprise_36154
3218
+ 399172782_angry_26197
3219
+ 399172782_angry_26851
3220
+ 399172782_surprise_69860
3221
+ 399172782_fear_45465
3222
+ 399172782_sad_61916
3223
+ 399172782_angry_57855
3224
+ 399172782_sad_64636
3225
+ 399172782_neutral_19617
3226
+ 399172782_surprise_23787
3227
+ 399172782_happy_64346
3228
+ 399172782_sad_19777
3229
+ 399172782_surprise_73551
3230
+ 399172782_neutral_70247
3231
+ 399172782_surprise_54644
3232
+ 399172782_neutral_58107
3233
+ 399172782_angry_61939
3234
+ 399172782_angry_64651
3235
+ 399172782_fear_61898
3236
+ 399172782_surprise_58296
3237
+ 399172782_angry_69702
3238
+ 399172782_surprise_26223
3239
+ 399172782_fear_49197
3240
+ 399172782_surprise_62017
3241
+ 399172782_neutral_65346
3242
+ 399172782_sad_25408
3243
+ 399172782_angry_55097
3244
+ 399172782_sad_69732
3245
+ 399172782_angry_72399
3246
+ 399172782_neutral_68243
3247
+ 399172782_happy_20516
3248
+ 399172782_fear_57709
3249
+ 399172782_fear_21586
3250
+ 399172782_angry_56016
3251
+ 399172782_angry_26699
3252
+ 399172782_fear_49581
3253
+ 399172782_angry_61956
3254
+ 399172782_neutral_26843
3255
+ 399172782_surprise_21652
3256
+ 399172782_fear_55502
3257
+ 399172782_surprise_56362
3258
+ 399172782_sad_61865
3259
+ 399172782_angry_70319
3260
+ 399172782_happy_72504
3261
+ 399172782_angry_61831
3262
+ 399172782_surprise_45284
3263
+ 399172782_sad_69893
3264
+ 399172782_neutral_57605
3265
+ 399172782_fear_69365
3266
+ 399172782_angry_61977
3267
+ 399172782_surprise_65525
3268
+ 399172782_neutral_61966
3269
+ 399172782_neutral_70302
3270
+ 399172782_sad_62656
3271
+ 399172782_fear_23803
3272
+ 399172782_neutral_69630
3273
+ 399172782_sad_57222
3274
+ 399172782_neutral_70094
3275
+ 399172782_fear_19666
3276
+ 399172782_fear_45311
3277
+ 399172782_angry_49086
3278
+ 399172782_surprise_45091
3279
+ 399172782_angry_72356
3280
+ 399172782_sad_57654
3281
+ 399172782_surprise_58135
3282
+ 399172782_surprise_20873
3283
+ 399172782_happy_21508
3284
+ 399172782_sad_70375
3285
+ 399172782_fear_20849
3286
+ 399172782_happy_70008
3287
+ 399172782_happy_62668
3288
+ 399172782_angry_62562
3289
+ 399172782_fear_65254
3290
+ 399172782_sad_45494
3291
+ 399172782_angry_49376
3292
+ 399172782_sad_61995
3293
+ 399172782_fear_57961
3294
+ 399172782_angry_55973
3295
+ 399172782_happy_21648
3296
+ 399172782_fear_45233
3297
+ 399172782_angry_54654
3298
+ 399172782_surprise_45298
3299
+ 399172782_neutral_45106
3300
+ 399172782_angry_19718
3301
+ 399172782_fear_64261
3302
+ 399172782_surprise_49604
3303
+ 399172782_happy_54795
3304
+ 399172782_sad_58128
3305
+ 399172782_surprise_20700
3306
+ 399172782_sad_57085
3307
+ 399172782_angry_49554
3308
+ 399172782_happy_55984
3309
+ 399172782_neutral_72596
3310
+ 399172782_surprise_69452
3311
+ 399172782_neutral_54493
3312
+ 399172782_angry_73534
3313
+ 399172782_angry_35202
3314
+ 399172782_angry_69823
3315
+ 399172782_sad_69738
3316
+ 399172782_happy_58136
3317
+ 399172782_angry_57797
3318
+ 399172782_neutral_19623
3319
+ 399172782_neutral_20687
3320
+ 399172782_happy_70406
3321
+ 399172782_sad_55955
3322
+ 399172782_surprise_49485
3323
+ 399172782_sad_45117
3324
+ 399172782_neutral_54738
3325
+ 399172782_sad_61948
3326
+ 399172782_neutral_43709
3327
+ 399172782_fear_20788
3328
+ 399172782_fear_62229
3329
+ 399172782_happy_45297
3330
+ 399172782_neutral_70567
3331
+ 399172782_angry_20717
3332
+ 399172782_neutral_54562
3333
+ 399172782_surprise_57301
3334
+ 399172782_happy_62596
3335
+ 399172782_angry_65222
3336
+ 399172782_neutral_58183
3337
+ 399172782_happy_65511
3338
+ 399172782_fear_58200
3339
+ 399172782_surprise_72340
3340
+ 399172782_angry_70450
3341
+ 399172782_neutral_64284
3342
+ 399172782_neutral_55276
3343
+ 399172782_sad_65483
3344
+ 399172782_surprise_45995
3345
+ 399172782_happy_26805
3346
+ 399172782_angry_54767
3347
+ 399172782_happy_57841
3348
+ 399172782_happy_65471
3349
+ 399172782_surprise_26205
3350
+ 399172782_fear_23989
3351
+ 399172782_fear_64125
3352
+ 399172782_surprise_64387
3353
+ 399172782_angry_25699
3354
+ 399172782_sad_56032
3355
+ 399172782_fear_72685
3356
+ 399172782_fear_56136
3357
+ 399172782_angry_23745
3358
+ 399172782_sad_19672
3359
+ 399172782_neutral_43702
3360
+ 399172782_happy_56984
3361
+ 399172782_sad_64441
3362
+ 399172782_neutral_57984
3363
+ 399172782_angry_72647
3364
+ 399172782_happy_69716
3365
+ 399172782_happy_61798
3366
+ 399172782_neutral_21611
3367
+ 399172782_fear_64444
3368
+ 399172782_sad_73707
3369
+ 399172782_surprise_56501
3370
+ 399172782_sad_70316
3371
+ 399172782_neutral_54814
3372
+ 399172782_angry_57019
3373
+ 399172782_sad_49516
3374
+ 399172782_neutral_57730
3375
+ 399172782_surprise_70545
3376
+ 399172782_angry_70337
3377
+ 399172782_neutral_24117
3378
+ 399172782_surprise_64492
3379
+ 399172782_surprise_62687
3380
+ 399172782_surprise_25680
3381
+ 399172782_neutral_23862
3382
+ 399172782_sad_23730
3383
+ 399172782_angry_58262
3384
+ 399172782_fear_36136
3385
+ 399172782_neutral_57827
3386
+ 399172782_surprise_70106
3387
+ 399172782_sad_36132
3388
+ 399172782_fear_48850
3389
+ 399172782_sad_49523
3390
+ 399172782_neutral_26841
3391
+ 399172782_sad_25325
3392
+ 399172782_angry_19781
3393
+ 399172782_surprise_64191
3394
+ 399172782_angry_19701
3395
+ 399172782_neutral_49496
3396
+ 399172782_surprise_65506
3397
+ 399172782_sad_19749
3398
+ 399172782_fear_62114
3399
+ 399172782_surprise_54692
3400
+ 399172782_angry_22164
3401
+ 399172782_neutral_19825
3402
+ 399172782_surprise_62009
3403
+ 399172782_fear_64619
3404
+ 399172782_surprise_26867
3405
+ 399172782_fear_57983
3406
+ 399172782_sad_73823
3407
+ 399172782_fear_61942
3408
+ 399172782_neutral_25697
3409
+ 399172782_happy_57743
3410
+ 399172782_angry_72544
3411
+ 399172782_surprise_20674
3412
+ 399172782_neutral_54770
3413
+ 399172782_angry_21543
3414
+ 399172782_neutral_23794
3415
+ 399172782_fear_56067
3416
+ 399172782_surprise_55373
3417
+ 399172782_fear_70120
3418
+ 399172782_surprise_57773
3419
+ 399172782_fear_58091
3420
+ 399172782_angry_65282
3421
+ 399172782_sad_57150
3422
+ 399172782_angry_25047
3423
+ 399172782_fear_69676
3424
+ 399172782_surprise_57687
3425
+ 399172782_happy_19679
3426
+ 399172782_happy_45073
3427
+ 399172782_neutral_26758
3428
+ 399172782_surprise_56147
3429
+ 399172782_angry_70544
3430
+ 399172782_angry_19757
3431
+ 399172782_sad_62200
3432
+ 399172782_sad_64687
3433
+ 399172782_happy_57582
3434
+ 399172782_neutral_73465
3435
+ 399172782_sad_26216
3436
+ 399172782_happy_65332
3437
+ 399172782_happy_20590
3438
+ 399172782_neutral_23699
3439
+ 399172782_happy_54656
3440
+ 399172782_sad_62267
3441
+ 399172782_sad_19808
3442
+ 399172782_fear_65481
3443
+ 399172782_happy_65541
3444
+ 399172782_angry_69551
3445
+ 399172782_surprise_69638
3446
+ 399172782_sad_70529
3447
+ 399172782_angry_60840
3448
+ 399172782_happy_64415
3449
+ 399172782_happy_49477
3450
+ 399172782_surprise_61858
3451
+ 399172782_happy_60845
3452
+ 399172782_surprise_20809
3453
+ 399172782_angry_19756
3454
+ 399172782_fear_73716
3455
+ 399172782_sad_72252
3456
+ 399172782_angry_70250
3457
+ 399172782_surprise_23749
3458
+ 399172782_sad_73591
3459
+ 399172782_fear_64128
3460
+ 399172782_angry_55360
3461
+ 399172782_sad_55378
3462
+ 399172782_sad_45279
3463
+ 399172782_angry_68043
3464
+ 399172782_surprise_20671
3465
+ 399172782_sad_14234
3466
+ 399172782_fear_56291
3467
+ 399172782_surprise_70334
3468
+ 399172782_happy_24003
3469
+ 399172782_sad_35213
3470
+ 399172782_surprise_54533
3471
+ 399172782_happy_68115
3472
+ 399172782_angry_69890
3473
+ 399172782_neutral_69332
3474
+ 399172782_angry_61832
3475
+ 399172782_sad_26752
3476
+ 399172782_fear_26731
3477
+ 399172782_sad_54667
3478
+ 399172782_sad_64313
3479
+ 399172782_sad_54448
3480
+ 399172782_angry_25358
3481
+ 399172782_sad_56109
3482
+ 399172782_neutral_62536
3483
+ 399172782_surprise_61971
3484
+ 399172782_neutral_72282
3485
+ 399172782_sad_49469
3486
+ 399172782_happy_65243
3487
+ 399172782_sad_14264
3488
+ 399172782_neutral_54621
3489
+ 399172782_surprise_19836
3490
+ 399172782_fear_62150
3491
+ 399172782_surprise_54616
3492
+ 399172782_surprise_48863
3493
+ 399172782_angry_56047
3494
+ 399172782_neutral_64420
3495
+ 399172782_surprise_26744
3496
+ 399172782_neutral_21549
3497
+ 399172782_happy_60853
3498
+ 399172782_angry_45156
3499
+ 399172782_fear_62658
3500
+ 399172782_happy_70574
3501
+ 399172782_surprise_56450
3502
+ 399172782_happy_69540
3503
+ 399172782_surprise_61946
3504
+ 399172782_fear_73547
3505
+ 399172782_happy_62465
3506
+ 399172782_neutral_70362
3507
+ 399172782_fear_70611
3508
+ 399172782_sad_57871
3509
+ 399172782_neutral_62432
3510
+ 399172782_angry_70569
3511
+ 399172782_fear_20799
3512
+ 399172782_happy_49600
3513
+ 399172782_neutral_23784
3514
+ 399172782_neutral_25689
3515
+ 399172782_angry_56197
3516
+ 399172782_sad_72379
3517
+ 399172782_neutral_56364
3518
+ 399172782_angry_14251
3519
+ 399172782_happy_55490
3520
+ 399172782_sad_70257
3521
+ 399172782_fear_56282
3522
+ 399172782_neutral_25062
3523
+ 399172782_fear_73679
3524
+ 399172782_fear_58105
3525
+ 399172782_happy_62431
3526
+ 399172782_happy_65329
3527
+ 399172782_happy_54573
3528
+ 399172782_angry_58223
3529
+ 399172782_fear_56074
3530
+ 399172782_neutral_72468
3531
+ 399172782_fear_65432
3532
+ 399172782_happy_57903
3533
+ 399172782_fear_64632
3534
+ 399172782_happy_19762
3535
+ 399172782_happy_45098
3536
+ 399172782_sad_65542
3537
+ 399172782_sad_56286
3538
+ 399172782_surprise_72346
3539
+ 399172782_angry_23788
3540
+ 399172782_angry_45490
3541
+ 399172782_sad_58019
3542
+ 399172782_neutral_45249
3543
+ 399172782_happy_64342
3544
+ 399172782_surprise_72519
3545
+ 399172782_surprise_21576
3546
+ 399172782_happy_54648
3547
+ 399172782_fear_45092
3548
+ 399172782_angry_45210
3549
+ 399172782_angry_62195
3550
+ 399172782_sad_25350
3551
+ 399172782_neutral_68298
3552
+ 399172782_angry_68016
3553
+ 399172782_fear_45341
3554
+ 399172782_sad_20519
3555
+ 399172782_surprise_60852
3556
+ 399172782_sad_57219
3557
+ 399172782_surprise_45128
3558
+ 399172782_surprise_62509
3559
+ 399172782_happy_64396
3560
+ 399172782_happy_68160
3561
+ 399172782_fear_72320
3562
+ 399172782_neutral_35203
3563
+ 399172782_neutral_43719
3564
+ 399172782_neutral_65545
3565
+ 399172782_happy_62254
3566
+ 399172782_happy_23816
3567
+ 399172782_neutral_54657
3568
+ 399172782_fear_56049
3569
+ 399172782_fear_25028
3570
+ 399172782_sad_64246
3571
+ 399172782_angry_26682
3572
+ 399172782_angry_69563
3573
+ 399172782_happy_20800
3574
+ 399172782_sad_70057
3575
+ 399172782_surprise_19726
3576
+ 399172782_happy_58143
3577
+ 399172782_sad_56205
3578
+ 399172782_sad_57024
3579
+ 399172782_neutral_69857
3580
+ 399172782_fear_56108
3581
+ 399172782_happy_20555
3582
+ 399172782_fear_20874
3583
+ 399172782_surprise_58203
3584
+ 399172782_fear_73643
3585
+ 399172782_sad_57919
3586
+ 399172782_angry_70563
3587
+ 399172782_happy_70329
3588
+ 399172782_angry_64162
3589
+ 399172782_fear_70385
3590
+ 399172782_neutral_61856
3591
+ 399172782_surprise_65458
3592
+ 399172782_angry_54709
3593
+ 399172782_angry_69462
3594
+ 399172782_sad_73548
3595
+ 399172782_angry_64540
3596
+ 399172782_happy_45309
3597
+ 399172782_neutral_68215
3598
+ 399172782_angry_19753
3599
+ 399172782_fear_73472
3600
+ 399172782_angry_57962
3601
+ 399172782_angry_55458
3602
+ 399172782_neutral_70528
3603
+ 399172782_angry_26692
3604
+ 399172782_sad_45348
3605
+ 399172782_surprise_72308
3606
+ 399172782_surprise_58150
3607
+ 399172782_sad_64616
3608
+ 399172782_fear_25035
3609
+ 399172782_fear_61905
3610
+ 399172782_surprise_45324
3611
+ 399172782_surprise_57293
3612
+ 399172782_angry_58022
3613
+ 399172782_neutral_73785
3614
+ 399172782_sad_57152
3615
+ 399172782_happy_25023
3616
+ 399172782_neutral_64157
3617
+ 399172782_neutral_73526
3618
+ 399172782_fear_25285
3619
+ 399172782_fear_70277
3620
+ 399172782_happy_69573
3621
+ 399172782_happy_54471
3622
+ 399172782_angry_55032
3623
+ 399172782_angry_23731
3624
+ 399172782_happy_70515
3625
+ 399172782_fear_22110
3626
+ 399172782_happy_55181
3627
+ 399172782_fear_58175
3628
+ 399172782_neutral_43668
3629
+ 399172782_happy_45470
3630
+ 399172782_fear_69814
3631
+ 399172782_neutral_43690
3632
+ 399172782_fear_26789
3633
+ 399172782_fear_56457
3634
+ 399172782_neutral_25294
3635
+ 399172782_happy_49468
3636
+ 399172782_surprise_68075
3637
+ 399172782_surprise_61930
3638
+ 399172782_surprise_26735
3639
+ 399172782_angry_57071
3640
+ 399172782_surprise_62039
3641
+ 399172782_neutral_21633
3642
+ 399172782_sad_58147
3643
+ 399172782_sad_21510
3644
+ 399172782_fear_72348
3645
+ 399172782_angry_62032
3646
+ 399172782_sad_62138
3647
+ 399172782_surprise_70193
3648
+ 399172782_neutral_25163
3649
+ 399172782_surprise_25194
3650
+ 399172782_happy_55314
3651
+ 399172782_fear_65318
3652
+ 399172782_angry_70615
3653
+ 399172782_neutral_73586
3654
+ 399172782_surprise_19791
3655
+ 399172782_neutral_45528
3656
+ 399172782_fear_45099
3657
+ 399172782_angry_60851
3658
+ 399172782_angry_62474
3659
+ 399172782_happy_62258
3660
+ 399172782_neutral_62214
3661
+ 399172782_angry_64608
3662
+ 399172782_fear_54558
3663
+ 399172782_fear_69839
3664
+ 399172782_happy_69915
3665
+ 399172782_happy_69804
3666
+ 399172782_angry_69507
3667
+ 399172782_fear_61825
3668
+ 399172782_neutral_56175
3669
+ 399172782_fear_68100
3670
+ 399172782_surprise_55510
3671
+ 399172782_happy_25459
3672
+ 399172782_happy_14235
3673
+ 399172782_fear_61833
3674
+ 399172782_neutral_62464
3675
+ 399172782_sad_64354
3676
+ 399172782_happy_46007
3677
+ 399172782_angry_73710
3678
+ 399172782_neutral_73605
3679
+ 399172782_fear_19759
3680
+ 399172782_angry_19761
3681
+ 399172782_fear_61923
3682
+ 399172782_fear_69677
3683
+ 399172782_fear_20812
3684
+ 399172782_angry_43732
3685
+ 399172782_sad_65451
3686
+ 399172782_surprise_64546
3687
+ 399172782_happy_57145
3688
+ 399172782_fear_26727
3689
+ 399172782_angry_45545
3690
+ 399172782_surprise_61852
3691
+ 399172782_surprise_56123
3692
+ 399172782_surprise_26742
3693
+ 399172782_surprise_69557
3694
+ 399172782_neutral_62411
3695
+ 399172782_neutral_20567
3696
+ 399172782_neutral_56266
3697
+ 399172782_happy_43712
3698
+ 399172782_sad_70463
3699
+ 399172782_neutral_70050
3700
+ 399172782_neutral_48922
3701
+ 399172782_neutral_54561
3702
+ 399172782_neutral_64273
3703
+ 399172782_sad_45256
3704
+ 399172782_neutral_21535
3705
+ 399172782_neutral_20834
3706
+ 399172782_neutral_72611
3707
+ 399172782_happy_46022
3708
+ 399172782_fear_22058
3709
+ 399172782_happy_26768
3710
+ 399172782_angry_60864
3711
+ 399172782_neutral_60901
3712
+ 399172782_angry_69363
3713
+ 399172782_neutral_57108
3714
+ 399172782_sad_26770
3715
+ 399172782_neutral_69867
3716
+ 399172782_fear_61801
3717
+ 399172782_happy_62557
3718
+ 399172782_sad_64168
3719
+ 399172782_angry_57380
3720
+ 399172782_angry_60874
3721
+ 399172782_fear_56488
3722
+ 399172782_happy_58011
3723
+ 399172782_angry_18425
3724
+ 399172782_neutral_68188
3725
+ 399172782_sad_45267
3726
+ 399172782_sad_64506
3727
+ 399172782_happy_72711
3728
+ 399172782_happy_69516
3729
+ 399172782_fear_62293
3730
+ 399172782_fear_69979
filelists/all_spks/feats.ark ADDED
Binary file (449 kB). View file
 
filelists/all_spks/feats.scp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ utt1 /Users/Desktop/code/GradTTS-emo/filelists/example/feats.ark:5
2
+ utt2 /Users/Desktop/code/GradTTS-emo/filelists/example/feats.ark:78745
3
+ utt3 /Users/Desktop/code/GradTTS-emo/filelists/example/feats.ark:370605
filelists/all_spks/text ADDED
The diff for this file is too large to render. See raw diff
 
filelists/all_spks/train_utts.txt ADDED
The diff for this file is too large to render. See raw diff
 
filelists/all_spks/utt2emo.json ADDED
The diff for this file is too large to render. See raw diff
 
filelists/all_spks/utt2spk.json ADDED
The diff for this file is too large to render. See raw diff
 
filelists/inference_generated.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Августың аяқ жағына мүсінші тәңірия Венераның баласы Амур бейнесін орналастырған.|0|0
2
+ Қарғыс айтқалы жатыр ғой, өз балаларына!– десіп үркіп үн салды.|1|1
g_01720000 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa0a76a49573909b968708717138849d68d9627cdf03307d00c7bd49278dc573
3
+ size 55824433
grad_uncond.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228736e3807fcd84030ec03daf1124094e6388e7a08ccabd87354d93a2e7fe0c
3
+ size 69784515
grad_uncond_10k_conf.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbc5ffac171e269fd08bb0313022cb2c3c35e9a9cc4620ef4992fd79cf6e61a8
3
+ size 69787322
grad_uncond_cnn_001.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc2ba56e0be17fa5d95c55b1c9bedfcd45a45edf865d1cb3f30a1405e645f67
3
+ size 69787003
inference_EMA.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import json
3
+ import datetime as dt
4
+ import numpy as np
5
+ from scipy.io.wavfile import write
6
+ import IPython.display as ipd
7
+ import glob
8
+ import torch
9
+ from pydub import AudioSegment
10
+ from torch.utils.data import DataLoader
11
+ from text import text_to_sequence, cmudict
12
+ from text.symbols import symbols
13
+ import utils_data
14
+ import re
15
+ from num2words import num2words
16
+ from kaldiio import WriteHelper
17
+ import os
18
+ from tqdm import tqdm
19
+ from text import text_to_sequence, convert_text
20
+ from model import GradTTSWithEmo
21
+ import utils_data as utils
22
+ from attrdict import AttrDict
23
+ from models import Generator as HiFiGAN
24
+
25
+
26
+ HIFIGAN_CONFIG = './configs/hifigan-config.json'
27
+ HIFIGAN_CHECKPT = './checkpts/hifigan.pt'
28
+
29
+
30
+ if __name__ == '__main__':
31
+ hps, args = utils.get_hparams_decode()
32
+ device = torch.device('cpu' if not torch.cuda.is_available() else "cuda")
33
+ ckpt = utils_data.latest_checkpoint_path(hps.model_dir, "EMA_grad_*.pt")
34
+ print(ckpt)
35
+ model = GradTTSWithEmo(**hps.model).to(device)
36
+ logger = utils_data.get_logger(hps.model_dir, "inference.log")
37
+ utils_data.load_checkpoint(ckpt, model, None)
38
+ _ = model.cuda().eval()
39
+
40
+ print('Initializing HiFi-GAN...')
41
+ with open(HIFIGAN_CONFIG) as f:
42
+ h = AttrDict(json.load(f))
43
+ vocoder = HiFiGAN(h)
44
+ vocoder.load_state_dict(torch.load(HIFIGAN_CHECKPT, map_location=lambda loc, storage: loc)['generator'])
45
+ _ = vocoder.cuda().eval()
46
+ vocoder.remove_weight_norm()
47
+
48
+ emos = sorted(["angry", "surprise", "fear", "happy", "neutral", "sad"])
49
+ speakers = ['M1', 'F1', 'M2']
50
+
51
+ with open(args.file, 'r', encoding='utf-8') as f:
52
+ texts = [line.strip() for line in f.readlines()]
53
+
54
+ replace_nums = []
55
+ for i in texts:
56
+ replace_nums.append(i.split('|', 1))
57
+
58
+ nums2word = [re.sub('(\d+)', lambda m: num2words(m.group(), lang='kz'), sentence) for sentence in np.array(replace_nums)[:, 0]]
59
+ # Speakers id.
60
+ # M1 = 0
61
+ # F1 = 1
62
+ # M2 = 2
63
+ text2speech = []
64
+ for i, j in zip(nums2word, np.array(replace_nums)[:, 1]):
65
+ text2speech.append(f'{i}|{j}')
66
+
67
+ for i, line in enumerate(text2speech):
68
+ emo_i = int(line.split('|')[1])
69
+ control_spk_id = int(line.split('|')[2])
70
+ control_emo_id = emos.index(emos[emo_i])
71
+ text = line.split('|')[0]
72
+ with torch.no_grad():
73
+ ### define emotion
74
+ emo = torch.LongTensor([control_emo_id]).to(device)
75
+ sid = torch.LongTensor([control_spk_id]).to(device)
76
+ text_padded, text_len = convert_text(text)
77
+ y_enc, y_dec, attn = model.forward(text_padded, text_len,
78
+ n_timesteps=args.timesteps,
79
+ temperature=args.noise,
80
+ stoc=args.stoc, spk=sid,emo=emo, length_scale=1.,
81
+ classifier_free_guidance=args.guidance)
82
+ res = y_dec.squeeze().cpu().numpy()
83
+ x = torch.from_numpy(res).cuda().unsqueeze(0)
84
+ y_g_hat = vocoder(x)
85
+ audio = y_g_hat.squeeze()
86
+ audio = audio * 32768.0
87
+ audio = audio.detach().cpu().numpy().astype('int16')
88
+ audio = AudioSegment(audio.data, frame_rate=22050, sample_width=2, channels=1)
89
+ audio.export(f'{args.generated_path}/{emos[emo_i]}_{speakers[int(line.split("|")[2])]}.wav', format="wav")
inference_intensity_control.ipynb ADDED
File without changes
melspec.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
+ import librosa
4
+
5
+ mel_basis = {}
6
+ hann_window = {}
7
+
8
+ def dynamic_range_compression_torch(x, C=1, clip_val=1e-5):
9
+ return torch.log(torch.clamp(x, min=clip_val) * C)
10
+
11
+ def spectral_normalize_torch(magnitudes):
12
+ output = dynamic_range_compression_torch(magnitudes)
13
+ return output
14
+
15
+
16
+
17
+ def mel_spectrogram(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False):
18
+ if torch.min(y) < -1.:
19
+ print('min value is ', torch.min(y))
20
+ if torch.max(y) > 1.:
21
+ print('max value is ', torch.max(y))
22
+
23
+ global mel_basis, hann_window
24
+ if fmax not in mel_basis:
25
+ mel = librosa.filters.mel(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax)
26
+ mel_basis[str(fmax)+'_'+str(y.device)] = torch.from_numpy(mel).float().to(y.device)
27
+ hann_window[str(y.device)] = torch.hann_window(win_size).to(y.device)
28
+
29
+ y = torch.nn.functional.pad(y.unsqueeze(1), (int((n_fft-hop_size)/2), int((n_fft-hop_size)/2)), mode='reflect')
30
+ y = y.squeeze(1)
31
+
32
+ spec = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[str(y.device)],
33
+ center=center, pad_mode='reflect', normalized=False, onesided=True)
34
+
35
+ spec = torch.sqrt(spec.pow(2).sum(-1)+(1e-9))
36
+
37
+ spec = torch.matmul(mel_basis[str(fmax)+'_'+str(y.device)], spec)
38
+ spec = spectral_normalize_torch(spec)
39
+
40
+ return spec.numpy()
model/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+
2
+ from .tts import GradTTSWithEmo, GradTTSXvector
model/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (240 Bytes). View file
 
model/__pycache__/tts.cpython-39.pyc ADDED
Binary file (16.7 kB). View file
 
model/base.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+
5
+ class BaseModule(torch.nn.Module):
6
+ def __init__(self):
7
+ super(BaseModule, self).__init__()
8
+
9
+ @property
10
+ def nparams(self):
11
+ """
12
+ Returns number of trainable parameters of the module.
13
+ """
14
+ num_params = 0
15
+ for name, param in self.named_parameters():
16
+ if param.requires_grad:
17
+ num_params += np.prod(param.detach().cpu().numpy().shape)
18
+ return num_params
19
+
20
+ def relocate_input(self, x: list):
21
+ """
22
+ Relocates provided tensors to the same device set for the module.
23
+ """
24
+ device = next(self.parameters()).device
25
+ for i in range(len(x)):
26
+ if isinstance(x[i], torch.Tensor) and x[i].device != device:
27
+ x[i] = x[i].to(device)
28
+ return x
model/classifier.py ADDED
@@ -0,0 +1,690 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torch import Tensor, BoolTensor
4
+
5
+ from typing import Optional, Tuple, Iterable
6
+ from model.diffusion import SinusoidalPosEmb
7
+ from torch.nn.functional import pad
8
+
9
+
10
+ import math
11
+
12
+ def silu(input):
13
+ '''
14
+ Applies the Sigmoid Linear Unit (SiLU) function element-wise:
15
+ SiLU(x) = x * sigmoid(x)
16
+ '''
17
+ return input * torch.sigmoid(input) # use torch.sigmoid to make sure that we created the most efficient implemetation based on builtin PyTorch functions
18
+
19
+
20
+ class RelPositionMultiHeadedAttention(nn.Module):
21
+ """Multi-Head Self-Attention layer with relative position encoding.
22
+ Paper: https://arxiv.org/abs/1901.02860
23
+ Args:
24
+ n_head: The number of heads.
25
+ d: The number of features.
26
+ dropout: Dropout rate.
27
+ zero_triu: Whether to zero the upper triangular part of attention matrix.
28
+ """
29
+
30
+ def __init__(
31
+ self, d: int, n_head: int, dropout: float
32
+ ):
33
+ super().__init__()
34
+ assert d % n_head == 0
35
+ self.c = d // n_head
36
+ self.h = n_head
37
+
38
+ self.linear_q = nn.Linear(d, d)
39
+ self.linear_k = nn.Linear(d, d)
40
+ self.linear_v = nn.Linear(d, d)
41
+ self.linear_out = nn.Linear(d, d)
42
+
43
+ self.p_attn = None
44
+ self.dropout = nn.Dropout(p=dropout)
45
+
46
+ # linear transformation for positional encoding
47
+ self.linear_pos = nn.Linear(d, d, bias=False)
48
+
49
+ # these two learnable bias are used in matrix c and matrix d
50
+ # as described in https://arxiv.org/abs/1901.02860 Section 3.3
51
+ self.u = nn.Parameter(torch.Tensor(self.h, self.c))
52
+ self.v = nn.Parameter(torch.Tensor(self.h, self.c))
53
+ # [H, C]
54
+ torch.nn.init.xavier_uniform_(self.u)
55
+ torch.nn.init.xavier_uniform_(self.v)
56
+
57
+ def forward_qkv(self, query, key, value) -> Tuple[Tensor, ...]:
58
+ """Transform query, key and value.
59
+ Args:
60
+ query (Tensor): [B, S, D].
61
+ key (Tensor): [B, T, D].
62
+ value (Tensor): [B, T, D].
63
+ Returns:
64
+ q (Tensor): [B, H, S, C].
65
+ k (Tensor): [B, H, T, C].
66
+ v (Tensor): [B, H, T, C].
67
+ """
68
+ n_batch = query.size(0)
69
+ q = self.linear_q(query).view(n_batch, -1, self.h, self.c)
70
+ k = self.linear_k(key).view(n_batch, -1, self.h, self.c)
71
+ v = self.linear_v(value).view(n_batch, -1, self.h, self.c)
72
+ q = q.transpose(1, 2)
73
+ k = k.transpose(1, 2)
74
+ v = v.transpose(1, 2)
75
+ return q, k, v
76
+
77
+ def forward_attention(self, v, scores, mask, causal=False) -> Tensor:
78
+ """Compute attention context vector.
79
+ Args:
80
+ v (Tensor): [B, H, T, C].
81
+ scores (Tensor): [B, H, S, T].
82
+ mask (BoolTensor): [B, T], True values are masked from scores.
83
+ Returns:
84
+ result (Tensor): [B, S, D]. Attention result weighted by the score.
85
+ """
86
+ n_batch, H, S, T = scores.shape
87
+ if mask is not None:
88
+ scores = scores.masked_fill(
89
+ mask.unsqueeze(1).unsqueeze(2).to(bool),
90
+ float("-inf"), # [B, H, S, T]
91
+ )
92
+ if causal:
93
+ k_grid = torch.arange(0, S, dtype=torch.int32, device=scores.device)
94
+ v_grid = torch.arange(0, T, dtype=torch.int32, device=scores.device)
95
+ kk, vv = torch.meshgrid(k_grid, v_grid, indexing="ij")
96
+ causal_mask = vv > kk
97
+ scores = scores.masked_fill(
98
+ causal_mask.view(1, 1, S, T), float("-inf")
99
+ )
100
+
101
+ p_attn = self.p_attn = torch.softmax(scores, dim=-1) # [B, H, S, T]
102
+ p_attn = self.dropout(p_attn) # [B, H, S, T]
103
+
104
+ x = torch.matmul(p_attn, v) # [B, H, S, C]
105
+ x = (
106
+ x.transpose(1, 2).contiguous().view(n_batch, -1, self.h * self.c)
107
+ ) # [B, S, D]
108
+
109
+ return self.linear_out(x) # [B, S, D]
110
+
111
+ def rel_shift(self, x):
112
+ """Converting (..., i, i - j) matrix into (..., i, j) matrix.
113
+ Args:
114
+ x (Tensor): [B, H, S, 2S-1].
115
+ Returns:
116
+ x (Tensor): [B, H, S, S].
117
+ Example: Take S = 2 for example, larger values work similarly.
118
+ x = [
119
+ [(0, -1), (0, 0), (0, 1)],
120
+ [(1, 0), (1, 1), (1, 2)]
121
+ ]
122
+ x_padded = [
123
+ [(x, x), (0, -1), (0, 0), (0, 1)],
124
+ [(x, x), (1, 0), (1, 1), (1, 2)]]
125
+ ]
126
+ x_padded = [
127
+ [(x, x), (0, -1)],
128
+ [(0, 0), (0, 1)],
129
+ [(x, x), (1, 0)],
130
+ [(1, 1), (1, 2)]
131
+ ]
132
+ x = [
133
+ [(0, 0), (0, 1)],
134
+ [(1, 0), (1, 1)]
135
+ ]
136
+ """
137
+ B, H, S, _ = x.shape
138
+ zero_pad = torch.zeros((B, H, S, 1), device=x.device, dtype=x.dtype)
139
+ # [B, H, S, 1]
140
+ x_padded = torch.cat([zero_pad, x], dim=-1)
141
+ # [B, H, S, 2S]
142
+ x_padded = x_padded.view(B, H, 2 * S, S)
143
+ # [B, H, 2S, S]
144
+ x = x_padded[:, :, 1:].view_as(x)[:, :, :, :S]
145
+ # only keep the positions from 0 to S
146
+ # [B, H, 2S-1, S] <view> [B, H, S, 2S - 1] <truncate in dim -1> [B, H, S, S]
147
+ return x
148
+
149
+ def forward(
150
+ self, query, key, value, pos_emb, mask=None, causal=False):
151
+ """Compute self-attention with relative positional embedding.
152
+ Args:
153
+ query (Tensor): [B, S, D].
154
+ key (Tensor): [B, S, D].
155
+ value (Tensor): [B, S, D].
156
+ pos_emb (Tensor): [1/B, 2S-1, D]. Positional embedding.
157
+ mask (BoolTensor): [B, S], True for masked.
158
+ causal (bool): True for applying causal mask.
159
+ Returns:
160
+ output (Tensor): [B, S, D].
161
+ """
162
+ # Splitting Q, K, V:
163
+ q, k, v = self.forward_qkv(query, key, value)
164
+ # [B, H, S, C], [B, H, S, C], [B, H, S, C]
165
+
166
+ # Adding per head & channel biases to the query vectors:
167
+ q_u = q + self.u.unsqueeze(1)
168
+ q_v = q + self.v.unsqueeze(1)
169
+ # [B, H, S, C]
170
+
171
+ # Splitting relative positional coding:
172
+ n_batch_pos = pos_emb.size(0) # [1/B, 2S-1, D]
173
+ p = self.linear_pos(pos_emb).view(n_batch_pos, -1, self.h, self.c)
174
+ # [1/B, 2S-1, H, C]
175
+ p = p.transpose(1, 2) # [1/B, H, 2S-1, C].
176
+
177
+ # Compute query, key similarity:
178
+ matrix_ac = torch.matmul(q_u, k.transpose(-2, -1))
179
+ # [B, H, S, C] x [B, H, C, S] -> [B, H, S, S]
180
+
181
+ matrix_bd = torch.matmul(q_v, p.transpose(-2, -1))
182
+ # [B, H, S, C] x [1/B, H, C, 2S-1] -> [B, H, S, 2S-1]
183
+ matrix_bd = self.rel_shift(matrix_bd)
184
+
185
+ scores = (matrix_ac + matrix_bd) / math.sqrt(self.c)
186
+ # [B, H, S, S]
187
+
188
+ return self.forward_attention(v, scores, mask, causal) # [B, S, D]
189
+
190
+
191
+ class ConditionalBiasScale(nn.Module):
192
+ def __init__(self, channels: int, cond_channels: int):
193
+ super().__init__()
194
+ self.scale_transform = nn.Linear(
195
+ cond_channels, channels, bias=True
196
+ )
197
+ self.bias_transform = nn.Linear(
198
+ cond_channels, channels, bias=True
199
+ )
200
+ self.init_parameters()
201
+
202
+ def init_parameters(self):
203
+ torch.nn.init.constant_(self.scale_transform.weight, 0.0)
204
+ torch.nn.init.constant_(self.scale_transform.bias, 1.0)
205
+ torch.nn.init.constant_(self.bias_transform.weight, 0.0)
206
+ torch.nn.init.constant_(self.bias_transform.bias, 0.0)
207
+
208
+ def forward(self, x: Tensor, cond: Tensor) -> Tensor:
209
+ """Applying conditional bias and scale.
210
+ Args:
211
+ x (Tensor): [..., channels].
212
+ cond (Tensor): [..., cond_channels].
213
+ Returns:
214
+ y (Tensor): [..., channels].
215
+ """
216
+ a = self.scale_transform.forward(cond)
217
+ b = self.bias_transform.forward(cond)
218
+ return x * a + b
219
+
220
+
221
+ class FeedForwardModule(torch.nn.Module):
222
+ """Positionwise feed forward layer used in conformer"""
223
+
224
+ def __init__(
225
+ self, d_in: int, d_hidden: int,
226
+ dropout: float, bias: bool = True, d_cond: int = 0
227
+ ):
228
+ """
229
+ Args:
230
+ d_in (int): Input feature dimension.
231
+ d_hidden (int): Hidden unit dimension.
232
+ dropout (float): dropout value for first Linear Layer.
233
+ bias (bool): If linear layers should have bias.
234
+ d_cond (int, optional): The channels of conditional tensor.
235
+ """
236
+ super(FeedForwardModule, self).__init__()
237
+ self.layer_norm = torch.nn.LayerNorm(d_in)
238
+
239
+ if d_cond > 0:
240
+ self.cond_layer = ConditionalBiasScale(d_in, d_cond)
241
+
242
+ self.w_1 = torch.nn.Linear(d_in, d_hidden, bias=bias)
243
+ self.w_2 = torch.nn.Linear(d_hidden, d_in, bias=bias)
244
+ self.dropout = torch.nn.Dropout(dropout)
245
+
246
+ def forward(self, x: Tensor, cond: Optional[Tensor] = None) -> Tensor:
247
+ """
248
+ Args:
249
+ x (Tensor): [..., D].
250
+ Returns:
251
+ y (Tensor): [..., D].
252
+ cond (Tensor): [..., D_cond]
253
+ """
254
+ x = self.layer_norm(x)
255
+
256
+ if cond is not None:
257
+ x = self.cond_layer.forward(x, cond)
258
+
259
+ x = self.w_1(x)
260
+ x = silu(x)
261
+ x = self.dropout(x)
262
+ x = self.w_2(x)
263
+ return self.dropout(x)
264
+
265
+
266
+ class RelPositionalEncoding(nn.Module):
267
+ """Relative positional encoding cache.
268
+
269
+ Args:
270
+ d_model: Embedding dimension.
271
+ dropout_rate: Dropout rate.
272
+ max_len: Default maximum input length.
273
+ """
274
+
275
+ def __init__(self, max_len: int, d_model: int):
276
+ super().__init__()
277
+ self.d_model = d_model
278
+ self.cached_code = None
279
+ self.l = 0
280
+ self.gen_code(torch.tensor(0.0).expand(1, max_len))
281
+
282
+ def gen_code(self, x: Tensor):
283
+ """Generate positional encoding with a reference tensor x.
284
+ Args:
285
+ x (Tensor): [B, L, ...], we extract the device, length, and dtype from it.
286
+ Effects:
287
+ self.cached_code (Tensor): [1, >=(2L-1), D].
288
+ """
289
+ l = x.size(1)
290
+ if self.l >= l:
291
+ if self.cached_code.dtype != x.dtype or self.cached_code.device != x.device:
292
+ self.cached_code = self.cached_code.to(dtype=x.dtype, device=x.device)
293
+ return
294
+ # Suppose `i` means to the position of query vecotr and `j` means the
295
+ # position of key vector. We use position relative positions when keys
296
+ # are to the left (i>j) and negative relative positions otherwise (i<j).
297
+ code_pos = torch.zeros(l, self.d_model) # [L, D]
298
+ code_neg = torch.zeros(l, self.d_model) # [L, D]
299
+ pos = torch.arange(0, l, dtype=torch.float32).unsqueeze(1) # [L, 1]
300
+ decay = torch.exp(
301
+ torch.arange(0, self.d_model, 2, dtype=torch.float32)
302
+ * -(math.log(10000.0) / self.d_model)
303
+ ) # [D // 2]
304
+ code_pos[:, 0::2] = torch.sin(pos * decay)
305
+ code_pos[:, 1::2] = torch.cos(pos * decay)
306
+ code_neg[:, 0::2] = torch.sin(-1 * pos * decay)
307
+ code_neg[:, 1::2] = torch.cos(-1 * pos * decay)
308
+
309
+ # Reserve the order of positive indices and concat both positive and
310
+ # negative indices. This is used to support the shifting trick
311
+ # as in https://arxiv.org/abs/1901.02860
312
+ code_pos = torch.flip(code_pos, [0]).unsqueeze(0) # [1, L, D]
313
+ code_neg = code_neg[1:].unsqueeze(0) # [1, L - 1, D]
314
+ code = torch.cat([code_pos, code_neg], dim=1) # [1, 2L - 1, D]
315
+ self.cached_code = code.to(device=x.device, dtype=x.dtype)
316
+ self.l = l
317
+
318
+ def forward(self, x: Tensor) -> Tensor:
319
+ """Get positional encoding of appropriate shape given a reference Tensor.
320
+ Args:
321
+ x (Tensor): [B, L, ...].
322
+ Returns:
323
+ y (Tensor): [1, 2L-1, D].
324
+ """
325
+ self.gen_code(x)
326
+ l = x.size(1)
327
+ pos_emb = self.cached_code[
328
+ :, self.l - l: self.l + l - 1,
329
+ ]
330
+ return pos_emb
331
+
332
+
333
+ class ConformerBlock(torch.nn.Module):
334
+ """Conformer block based on https://arxiv.org/abs/2005.08100."""
335
+
336
+ def __init__(
337
+ self, d: int, d_hidden: int,
338
+ attention_heads: int, dropout: float,
339
+ depthwise_conv_kernel_size: int = 7,
340
+ causal: bool = False, d_cond: int = 0
341
+ ):
342
+ """
343
+ Args:
344
+ d (int): Block input output channel number.
345
+ d_hidden (int): FFN layer dimension.
346
+ attention_heads (int): Number of attention heads.
347
+ dropout (float): dropout value.
348
+ depthwise_conv_kernel_size (int): Size of kernel in depthwise conv.
349
+ d_cond (int, optional): The channels of conditional tensor.
350
+ """
351
+ super(ConformerBlock, self).__init__()
352
+ self.causal = causal
353
+ self.ffn1 = FeedForwardModule(
354
+ d, d_hidden, dropout, bias=True, d_cond=d_cond
355
+ )
356
+
357
+ self.self_attn_layer_norm = torch.nn.LayerNorm(d)
358
+
359
+ if d_cond > 0:
360
+ self.cond_layer = ConditionalBiasScale(d, d_cond)
361
+
362
+ self.self_attn = RelPositionMultiHeadedAttention(
363
+ d, attention_heads, dropout=dropout
364
+ )
365
+ self.self_attn_dropout = torch.nn.Dropout(dropout)
366
+
367
+ self.conv_module = ConvolutionModule(
368
+ d_in=d, d_hidden=d,
369
+ depthwise_kernel_size=depthwise_conv_kernel_size,
370
+ dropout=dropout, d_cond=d_cond
371
+ )
372
+
373
+ self.ffn2 = FeedForwardModule(
374
+ d, d_hidden, dropout, bias=True, d_cond=d_cond
375
+ )
376
+
377
+ self.final_layer_norm = torch.nn.LayerNorm(d)
378
+
379
+ def forward(
380
+ self, x: Tensor, mask: BoolTensor, pos_emb: Tensor,
381
+ cond: Optional[Tensor] = None
382
+ ) -> Tensor:
383
+ """
384
+ Args:
385
+ x (Tensor): [B, T, D_in].
386
+ mask (BoolTensor): [B, T], True for masked.
387
+ pos_emb (Tensor): [1 or B, 2T-1, D].
388
+ cond (Tensor, optional): [B, ?, D_cond].
389
+ Returns:
390
+ y (Tensor): [B, T, D_in].
391
+ """
392
+ y = x
393
+
394
+ x = self.ffn1(x) * 0.5 + y
395
+ y = x
396
+ # [B, T, D_in]
397
+
398
+ x = self.self_attn_layer_norm(x)
399
+
400
+ if cond is not None:
401
+ x = self.cond_layer.forward(x, cond)
402
+
403
+ x = self.self_attn.forward(
404
+ query=x, key=x, value=x,
405
+ pos_emb=pos_emb,
406
+ mask=mask, causal=self.causal
407
+ )
408
+ x = self.self_attn_dropout(x) + y
409
+ y = x
410
+ # [B, T, D_in]
411
+
412
+ x = self.conv_module.forward(x, mask) + y
413
+ y = x
414
+ # [B, T, D_in]
415
+
416
+ x = self.ffn2(x) * 0.5 + y
417
+
418
+ x = self.final_layer_norm(x)
419
+
420
+ x.masked_fill(mask.unsqueeze(-1), 0.0)
421
+
422
+ return x
423
+
424
+
425
+ class ConvolutionModule(torch.nn.Module):
426
+ """Convolution Block inside a Conformer Block."""
427
+
428
+ def __init__(
429
+ self, d_in: int, d_hidden: int,
430
+ depthwise_kernel_size: int,
431
+ dropout: float, bias: bool = False,
432
+ causal: bool = False, d_cond: int = 0
433
+ ):
434
+ """
435
+ Args:
436
+ d_in (int): Embedding dimension.
437
+ d_hidden (int): Number of channels in depthwise conv layers.
438
+ depthwise_kernel_size (int): Depthwise conv layer kernel size.
439
+ dropout (float): dropout value.
440
+ bias (bool): If bias should be added to conv layers.
441
+ conditional (bool): Whether to use conditional LayerNorm.
442
+ """
443
+ super(ConvolutionModule, self).__init__()
444
+ assert (depthwise_kernel_size - 1) % 2 == 0, "kernel_size should be odd"
445
+ self.causal = causal
446
+ self.causal_padding = (depthwise_kernel_size - 1, 0)
447
+ self.layer_norm = torch.nn.LayerNorm(d_in)
448
+
449
+ # Optional conditional LayerNorm:
450
+ self.d_cond = d_cond
451
+ if d_cond > 0:
452
+ self.cond_layer = ConditionalBiasScale(d_in, d_cond)
453
+
454
+ self.pointwise_conv1 = torch.nn.Conv1d(
455
+ d_in, 2 * d_hidden,
456
+ kernel_size=1,
457
+ stride=1, padding=0,
458
+ bias=bias
459
+ )
460
+ self.glu = torch.nn.GLU(dim=1)
461
+ self.depthwise_conv = torch.nn.Conv1d(
462
+ d_hidden, d_hidden,
463
+ kernel_size=depthwise_kernel_size,
464
+ stride=1,
465
+ padding=(depthwise_kernel_size - 1) // 2 if not causal else 0,
466
+ groups=d_hidden, bias=bias
467
+ )
468
+ self.pointwise_conv2 = torch.nn.Conv1d(
469
+ d_hidden, d_in,
470
+ kernel_size=1,
471
+ stride=1, padding=0,
472
+ bias=bias,
473
+ )
474
+ self.dropout = torch.nn.Dropout(dropout)
475
+
476
+ def forward(self, x: Tensor, mask: BoolTensor, cond: Optional[Tensor] = None) -> Tensor:
477
+ """
478
+ Args:
479
+ x (Tensor): [B, T, D_in].
480
+ mask (BoolTensor): [B, T], True for masked.
481
+ cond (Tensor): [B, T, D_cond].
482
+ Returns:
483
+ y (Tensor): [B, T, D_in].
484
+ """
485
+ x = self.layer_norm(x)
486
+
487
+ if cond is not None:
488
+ x = self.cond_layer.forward(x, cond)
489
+
490
+ x = x.transpose(-1, -2) # [B, D_in, T]
491
+
492
+ x = self.pointwise_conv1(x) # [B, 2C, T]
493
+ x = self.glu(x) # [B, C, T]
494
+
495
+ # Take care of masking the input tensor:
496
+ if mask is not None:
497
+ x = x.masked_fill(mask.unsqueeze(1), 0.0)
498
+
499
+ # 1D Depthwise Conv
500
+ if self.causal: # Causal padding
501
+ x = pad(x, self.causal_padding)
502
+ x = self.depthwise_conv(x)
503
+ # FIXME: BatchNorm should not be used in variable length training.
504
+ x = silu(x) # [B, C, T]
505
+
506
+ if mask is not None:
507
+ x = x.masked_fill(mask.unsqueeze(1), 0.0)
508
+
509
+ x = self.pointwise_conv2(x)
510
+ x = self.dropout(x)
511
+ return x.transpose(-1, -2) # [B, T, D_in]
512
+
513
+
514
+ class Conformer(torch.nn.Module):
515
+ def __init__(
516
+ self,
517
+ d: int,
518
+ d_hidden: int,
519
+ n_heads: int,
520
+ n_layers: int,
521
+ dropout: float,
522
+ depthwise_conv_kernel_size: int,
523
+ causal: bool = False,
524
+ d_cond: int = 0
525
+ ):
526
+ super().__init__()
527
+ self.pos_encoding = RelPositionalEncoding(1024, d)
528
+ self.causal = causal
529
+
530
+ self.blocks = torch.nn.ModuleList(
531
+ [
532
+ ConformerBlock(
533
+ d=d,
534
+ d_hidden=d_hidden,
535
+ attention_heads=n_heads,
536
+ dropout=dropout,
537
+ depthwise_conv_kernel_size=depthwise_conv_kernel_size,
538
+ causal=causal,
539
+ d_cond=d_cond
540
+ )
541
+ for _ in range(n_layers)
542
+ ]
543
+ ) # type: Iterable[ConformerBlock]
544
+
545
+ def forward(
546
+ self, x: Tensor, mask: BoolTensor, cond: Tensor = None
547
+ ) -> Tensor:
548
+ """Conformer forwarding.
549
+ Args:
550
+ x (Tensor): [B, T, D].
551
+ mask (BoolTensor): [B, T], with True for masked.
552
+ cond (Tensor, optional): [B, T, D_cond].
553
+ Returns:
554
+ y (Tensor): [B, T, D]
555
+ """
556
+ pos_emb = self.pos_encoding(x) # [1, 2T-1, D]
557
+
558
+ for block in self.blocks:
559
+ x = block.forward(x, mask, pos_emb, cond)
560
+
561
+ return x
562
+
563
+
564
+ class CNNBlock(nn.Module):
565
+ def __init__(self, in_dim, out_dim, dropout, cond_dim, kernel_size, stride):
566
+ super(CNNBlock, self).__init__()
567
+ self.layers = nn.Sequential(
568
+ nn.Conv1d(in_dim, out_dim, kernel_size, stride),
569
+ nn.ReLU(),
570
+ nn.BatchNorm1d(out_dim,),
571
+ nn.Dropout(p=dropout)
572
+ )
573
+
574
+ def forward(self, inp):
575
+ out = self.layers(inp)
576
+ return out
577
+
578
+
579
+ class CNNClassifier(nn.Module):
580
+ def __init__(self, in_dim, d_decoder, decoder_dropout, cond_dim):
581
+ super(CNNClassifier, self).__init__()
582
+ self.cnn = nn.Sequential(
583
+ CNNBlock(in_dim, d_decoder, decoder_dropout, cond_dim, 8, 4),
584
+ CNNBlock(d_decoder, d_decoder, decoder_dropout, cond_dim, 8, 4),
585
+ CNNBlock(d_decoder, d_decoder, decoder_dropout, cond_dim, 4, 2),
586
+ CNNBlock(d_decoder, d_decoder, decoder_dropout, cond_dim, 4, 2),
587
+ ) # receptive field is 180, frame shift is 64
588
+ self.cond_layer = nn.Sequential(
589
+ nn.Linear(cond_dim, in_dim),
590
+ nn.LeakyReLU(),
591
+ nn.Linear(in_dim, in_dim)
592
+ )
593
+
594
+ def forward(self, inp, mask, cond):
595
+ inp = inp.transpose(-1, -2)
596
+ cond = cond.transpose(-1, -2)
597
+ inp.masked_fill_(mask.unsqueeze(1), 0.0)
598
+ cond = self.cond_layer(cond.transpose(-1, -2)).transpose(-1, -2)
599
+ cond.masked_fill_(mask.unsqueeze(1), 0.0)
600
+ inp = inp + cond
601
+ return self.cnn(inp)
602
+
603
+
604
+ class CNNClassifierWithTime(nn.Module):
605
+ def __init__(self, in_dim, d_decoder, decoder_dropout, cond_dim, time_emb_dim=512):
606
+ super(CNNClassifierWithTime, self).__init__()
607
+ self.cnn = nn.Sequential(
608
+ CNNBlock(in_dim, d_decoder, decoder_dropout, cond_dim, 8, 4),
609
+ CNNBlock(d_decoder, d_decoder, decoder_dropout, cond_dim, 8, 4),
610
+ CNNBlock(d_decoder, d_decoder, decoder_dropout, cond_dim, 4, 2),
611
+ CNNBlock(d_decoder, d_decoder, decoder_dropout, cond_dim, 4, 2),
612
+ ) # receptive field is 180, frame shift is 64
613
+ self.cond_layer = nn.Sequential(
614
+ nn.Linear(cond_dim, in_dim),
615
+ nn.LeakyReLU(),
616
+ nn.Linear(in_dim, in_dim)
617
+ )
618
+ self.time_emb = SinusoidalPosEmb(time_emb_dim)
619
+ self.time_layer = nn.Sequential(
620
+ nn.Linear(time_emb_dim, in_dim),
621
+ nn.LeakyReLU(),
622
+ nn.Linear(in_dim, in_dim)
623
+ )
624
+
625
+ def forward(self, inp, mask, cond, t):
626
+ time_emb = self.time_emb(t) # [B, T]
627
+ time_emb = self.time_layer(time_emb.unsqueeze(1)).transpose(-1, -2)
628
+ inp = inp.transpose(-1, -2)
629
+ cond = cond.transpose(-1, -2)
630
+ inp.masked_fill_(mask.unsqueeze(1), 0.0)
631
+ cond = self.cond_layer(cond.transpose(-1, -2)).transpose(-1, -2)
632
+ cond.masked_fill_(mask.unsqueeze(1), 0.0)
633
+ inp = inp + cond + time_emb
634
+ return self.cnn(inp)
635
+
636
+
637
+ class SpecClassifier(nn.Module):
638
+ def __init__(self, in_dim, d_decoder, h_decoder,
639
+ l_decoder, decoder_dropout,
640
+ k_decoder, n_class, cond_dim, model_type='conformer'):
641
+ super(SpecClassifier, self).__init__()
642
+ self.model_type = model_type
643
+ self.prenet = nn.Sequential(
644
+ nn.Linear(in_features=in_dim, out_features=d_decoder)
645
+ )
646
+ if model_type == 'conformer':
647
+ self.conformer = Conformer(d=d_decoder, d_hidden=d_decoder, n_heads=h_decoder,
648
+ n_layers=l_decoder, dropout=decoder_dropout,
649
+ depthwise_conv_kernel_size=k_decoder, d_cond=cond_dim)
650
+ elif model_type == 'CNN':
651
+ self.conformer = CNNClassifier(in_dim=d_decoder, d_decoder=d_decoder,
652
+ decoder_dropout=decoder_dropout, cond_dim=cond_dim)
653
+ elif model_type == 'CNN-with-time':
654
+ self.conformer = CNNClassifierWithTime(in_dim=d_decoder, d_decoder=d_decoder,
655
+ decoder_dropout=decoder_dropout, cond_dim=cond_dim, time_emb_dim=256)
656
+ self.classifier = nn.Linear(d_decoder, n_class)
657
+
658
+ def forward(self, noisy_mel, condition, mask, **kwargs):
659
+ """
660
+ Args:
661
+ noisy_mel: [B, T, D]
662
+ condition: [B, T, D]
663
+ mask: [B, T] with True for un-masked (real-values)
664
+
665
+ Returns:
666
+ classification logits (un-softmaxed)
667
+ """
668
+ # print(noisy_mel.shape)
669
+ noisy_mel = noisy_mel.masked_fill(~mask.unsqueeze(-1), 0.0)
670
+
671
+ # print(self.prenet, noisy_mel.shape)
672
+ hiddens = self.prenet(noisy_mel)
673
+
674
+ if self.model_type == 'CNN-with-time':
675
+ hiddens = self.conformer.forward(hiddens, ~mask, condition, kwargs['t'])
676
+ else:
677
+ hiddens = self.conformer.forward(hiddens, ~mask, condition) # [B, T, D]
678
+
679
+ if self.model_type == 'conformer':
680
+ averaged_hiddens = torch.mean(hiddens, dim=1) # [B, D]
681
+ logits = self.classifier(averaged_hiddens)
682
+ return logits
683
+ elif self.model_type == 'CNN' or self.model_type == 'CNN-with-time':
684
+ hiddens = hiddens.transpose(-1, -2)
685
+ return self.classifier(hiddens) # [B, T', C]
686
+
687
+ @property
688
+ def nparams(self):
689
+ return sum([p.numel() for p in self.parameters()])
690
+
model/diffusion.py ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import torch
3
+ from einops import rearrange
4
+
5
+ from model.base import BaseModule
6
+
7
+
8
+ class Mish(BaseModule):
9
+ def forward(self, x):
10
+ return x * torch.tanh(torch.nn.functional.softplus(x))
11
+
12
+
13
+ class Upsample(BaseModule):
14
+ def __init__(self, dim):
15
+ super(Upsample, self).__init__()
16
+ self.conv = torch.nn.ConvTranspose2d(dim, dim, 4, 2, 1)
17
+
18
+ def forward(self, x):
19
+ return self.conv(x)
20
+
21
+
22
+ class Downsample(BaseModule):
23
+ def __init__(self, dim):
24
+ super(Downsample, self).__init__()
25
+ self.conv = torch.nn.Conv2d(dim, dim, 3, 2, 1) # kernel=3, stride=2, padding=1.
26
+
27
+ def forward(self, x):
28
+ return self.conv(x)
29
+
30
+
31
+ class Rezero(BaseModule):
32
+ def __init__(self, fn):
33
+ super(Rezero, self).__init__()
34
+ self.fn = fn
35
+ self.g = torch.nn.Parameter(torch.zeros(1))
36
+
37
+ def forward(self, x):
38
+ return self.fn(x) * self.g
39
+
40
+
41
+ class Block(BaseModule):
42
+ def __init__(self, dim, dim_out, groups=8):
43
+ super(Block, self).__init__()
44
+ self.block = torch.nn.Sequential(torch.nn.Conv2d(dim, dim_out, 3,
45
+ padding=1), torch.nn.GroupNorm(
46
+ groups, dim_out), Mish())
47
+
48
+ def forward(self, x, mask):
49
+ output = self.block(x * mask)
50
+ return output * mask
51
+
52
+
53
+ class ResnetBlock(BaseModule):
54
+ def __init__(self, dim, dim_out, time_emb_dim, groups=8):
55
+ super(ResnetBlock, self).__init__()
56
+ self.mlp = torch.nn.Sequential(Mish(), torch.nn.Linear(time_emb_dim,
57
+ dim_out))
58
+
59
+ self.block1 = Block(dim, dim_out, groups=groups)
60
+ self.block2 = Block(dim_out, dim_out, groups=groups)
61
+ if dim != dim_out:
62
+ self.res_conv = torch.nn.Conv2d(dim, dim_out, 1)
63
+ else:
64
+ self.res_conv = torch.nn.Identity()
65
+
66
+ def forward(self, x, mask, time_emb):
67
+ h = self.block1(x, mask)
68
+ h += self.mlp(time_emb).unsqueeze(-1).unsqueeze(-1)
69
+ h = self.block2(h, mask)
70
+ output = h + self.res_conv(x * mask)
71
+ return output
72
+
73
+
74
+ class LinearAttention(BaseModule):
75
+ def __init__(self, dim, heads=4, dim_head=32):
76
+ super(LinearAttention, self).__init__()
77
+ self.heads = heads
78
+ hidden_dim = dim_head * heads
79
+ self.to_qkv = torch.nn.Conv2d(dim, hidden_dim * 3, 1, bias=False) # NOTE: 1x1 conv
80
+ self.to_out = torch.nn.Conv2d(hidden_dim, dim, 1)
81
+
82
+ def forward(self, x):
83
+ b, c, h, w = x.shape
84
+ qkv = self.to_qkv(x)
85
+ q, k, v = rearrange(qkv, 'b (qkv heads c) h w -> qkv b heads c (h w)', heads=self.heads, qkv=3)
86
+ k = k.softmax(dim=-1)
87
+ context = torch.einsum('bhdn,bhen->bhde', k, v)
88
+ out = torch.einsum('bhde,bhdn->bhen', context, q)
89
+ out = rearrange(out, 'b heads c (h w) -> b (heads c) h w',
90
+ heads=self.heads, h=h, w=w)
91
+ return self.to_out(out)
92
+
93
+
94
+ class Residual(BaseModule):
95
+ def __init__(self, fn):
96
+ super(Residual, self).__init__()
97
+ self.fn = fn
98
+
99
+ def forward(self, x, *args, **kwargs):
100
+ output = self.fn(x, *args, **kwargs) + x
101
+ return output
102
+
103
+
104
+ class SinusoidalPosEmb(BaseModule):
105
+ def __init__(self, dim):
106
+ super(SinusoidalPosEmb, self).__init__()
107
+ self.dim = dim
108
+
109
+ def forward(self, x, scale=1000):
110
+ device = x.device
111
+ half_dim = self.dim // 2
112
+ emb = math.log(10000) / (half_dim - 1)
113
+ emb = torch.exp(torch.arange(half_dim, device=device).float() * -emb)
114
+ emb = scale * x.unsqueeze(1) * emb.unsqueeze(0)
115
+ emb = torch.cat((emb.sin(), emb.cos()), dim=-1)
116
+ return emb
117
+
118
+
119
+ class GradLogPEstimator2d(BaseModule):
120
+ def __init__(self, dim, dim_mults=(1, 2, 4), groups=8, spk_emb_dim=64, n_feats=80, pe_scale=1000):
121
+ super(GradLogPEstimator2d, self).__init__()
122
+ self.dim = dim
123
+ self.dim_mults = dim_mults
124
+ self.groups = groups
125
+ self.spk_emb_dim = spk_emb_dim
126
+ self.pe_scale = pe_scale
127
+
128
+ self.spk_mlp = torch.nn.Sequential(torch.nn.Linear(spk_emb_dim, spk_emb_dim * 4), Mish(),
129
+ torch.nn.Linear(spk_emb_dim * 4, n_feats))
130
+ self.time_pos_emb = SinusoidalPosEmb(dim)
131
+ self.mlp = torch.nn.Sequential(torch.nn.Linear(dim, dim * 4), Mish(),
132
+ torch.nn.Linear(dim * 4, dim))
133
+
134
+ dims = [3, *map(lambda m: dim * m, dim_mults)]
135
+ in_out = list(zip(dims[:-1], dims[1:]))
136
+ self.downs = torch.nn.ModuleList([])
137
+ self.ups = torch.nn.ModuleList([])
138
+ num_resolutions = len(in_out)
139
+
140
+ for ind, (dim_in, dim_out) in enumerate(in_out):
141
+ is_last = ind >= (num_resolutions - 1)
142
+ self.downs.append(torch.nn.ModuleList([
143
+ ResnetBlock(dim_in, dim_out, time_emb_dim=dim),
144
+ ResnetBlock(dim_out, dim_out, time_emb_dim=dim),
145
+ Residual(Rezero(LinearAttention(dim_out))),
146
+ Downsample(dim_out) if not is_last else torch.nn.Identity()]))
147
+
148
+ mid_dim = dims[-1]
149
+ self.mid_block1 = ResnetBlock(mid_dim, mid_dim, time_emb_dim=dim)
150
+ self.mid_attn = Residual(Rezero(LinearAttention(mid_dim)))
151
+ self.mid_block2 = ResnetBlock(mid_dim, mid_dim, time_emb_dim=dim)
152
+
153
+ for ind, (dim_in, dim_out) in enumerate(reversed(in_out[1:])):
154
+ self.ups.append(torch.nn.ModuleList([
155
+ ResnetBlock(dim_out * 2, dim_in, time_emb_dim=dim),
156
+ ResnetBlock(dim_in, dim_in, time_emb_dim=dim),
157
+ Residual(Rezero(LinearAttention(dim_in))),
158
+ Upsample(dim_in)]))
159
+ self.final_block = Block(dim, dim)
160
+ self.final_conv = torch.nn.Conv2d(dim, 1, 1)
161
+
162
+ def forward(self, x, mask, mu, t, spk=None):
163
+ # x, mu: [B, 80, L], t: [B, ], mask: [B, 1, L]
164
+ if not isinstance(spk, type(None)):
165
+ s = self.spk_mlp(spk)
166
+
167
+ t = self.time_pos_emb(t, scale=self.pe_scale)
168
+ t = self.mlp(t) # [B, 64]
169
+
170
+ s = s.unsqueeze(-1).repeat(1, 1, x.shape[-1])
171
+ x = torch.stack([mu, x, s], 1) # [B, 3, 80, L]
172
+ mask = mask.unsqueeze(1) # [B, 1, 1, L]
173
+
174
+ hiddens = []
175
+ masks = [mask]
176
+ for resnet1, resnet2, attn, downsample in self.downs:
177
+ mask_down = masks[-1]
178
+ x = resnet1(x, mask_down, t) # [B, 64, 80, L]
179
+ x = resnet2(x, mask_down, t)
180
+ x = attn(x)
181
+ hiddens.append(x)
182
+ x = downsample(x * mask_down)
183
+ masks.append(mask_down[:, :, :, ::2])
184
+
185
+ masks = masks[:-1]
186
+ mask_mid = masks[-1]
187
+ x = self.mid_block1(x, mask_mid, t)
188
+ x = self.mid_attn(x)
189
+ x = self.mid_block2(x, mask_mid, t)
190
+
191
+ for resnet1, resnet2, attn, upsample in self.ups:
192
+ mask_up = masks.pop()
193
+ x = torch.cat((x, hiddens.pop()), dim=1)
194
+ x = resnet1(x, mask_up, t)
195
+ x = resnet2(x, mask_up, t)
196
+ x = attn(x)
197
+ x = upsample(x * mask_up)
198
+
199
+ x = self.final_block(x, mask)
200
+ output = self.final_conv(x * mask)
201
+
202
+ return (output * mask).squeeze(1)
203
+
204
+
205
+ def get_noise(t, beta_init, beta_term, cumulative=False):
206
+ if cumulative:
207
+ noise = beta_init*t + 0.5*(beta_term - beta_init)*(t**2)
208
+ else:
209
+ noise = beta_init + (beta_term - beta_init)*t
210
+ return noise
211
+
212
+
213
+ class Diffusion(BaseModule):
214
+ def __init__(self, n_feats, dim, spk_emb_dim=64,
215
+ beta_min=0.05, beta_max=20, pe_scale=1000):
216
+ super(Diffusion, self).__init__()
217
+ self.n_feats = n_feats
218
+ self.dim = dim
219
+ # self.n_spks = n_spks
220
+ self.spk_emb_dim = spk_emb_dim
221
+ self.beta_min = beta_min
222
+ self.beta_max = beta_max
223
+ self.pe_scale = pe_scale
224
+
225
+ self.estimator = GradLogPEstimator2d(dim,
226
+ spk_emb_dim=spk_emb_dim,
227
+ pe_scale=pe_scale,
228
+ n_feats=n_feats)
229
+
230
+ def forward_diffusion(self, x0, mask, mu, t):
231
+ time = t.unsqueeze(-1).unsqueeze(-1)
232
+ cum_noise = get_noise(time, self.beta_min, self.beta_max, cumulative=True) # it is actually the integral of beta
233
+ mean = x0*torch.exp(-0.5*cum_noise) + mu*(1.0 - torch.exp(-0.5*cum_noise))
234
+ variance = 1.0 - torch.exp(-cum_noise)
235
+ z = torch.randn(x0.shape, dtype=x0.dtype, device=x0.device,
236
+ requires_grad=False)
237
+ xt = mean + z * torch.sqrt(variance)
238
+ return xt * mask, z * mask
239
+
240
+ @torch.no_grad()
241
+ def reverse_diffusion(self, z, mask, mu, n_timesteps, stoc=False, spk=None,
242
+ use_classifier_free=False,
243
+ classifier_free_guidance=3.0,
244
+ dummy_spk=None): # emo need to be merged by spk
245
+
246
+ # looks like a plain Euler-Maruyama method
247
+ h = 1.0 / n_timesteps
248
+ xt = z * mask
249
+ for i in range(n_timesteps):
250
+ t = (1.0 - (i + 0.5)*h) * torch.ones(z.shape[0], dtype=z.dtype,
251
+ device=z.device)
252
+ time = t.unsqueeze(-1).unsqueeze(-1)
253
+ noise_t = get_noise(time, self.beta_min, self.beta_max,
254
+ cumulative=False)
255
+
256
+ if not use_classifier_free:
257
+ if stoc: # adds stochastic term
258
+ dxt_det = 0.5 * (mu - xt) - self.estimator(xt, mask, mu, t, spk)
259
+ dxt_det = dxt_det * noise_t * h
260
+ dxt_stoc = torch.randn(z.shape, dtype=z.dtype, device=z.device,
261
+ requires_grad=False)
262
+ dxt_stoc = dxt_stoc * torch.sqrt(noise_t * h)
263
+ dxt = dxt_det + dxt_stoc
264
+ else:
265
+ dxt = 0.5 * (mu - xt - self.estimator(xt, mask, mu, t, spk))
266
+ dxt = dxt * noise_t * h
267
+ xt = (xt - dxt) * mask
268
+ else:
269
+ if stoc: # adds stochastic term
270
+ score_estimate = (1 + classifier_free_guidance) * self.estimator(xt, mask, mu, t, spk) \
271
+ - classifier_free_guidance * self.estimator(xt, mask, mu, t, dummy_spk)
272
+ dxt_det = 0.5 * (mu - xt) - score_estimate
273
+ dxt_det = dxt_det * noise_t * h
274
+ dxt_stoc = torch.randn(z.shape, dtype=z.dtype, device=z.device,
275
+ requires_grad=False)
276
+ dxt_stoc = dxt_stoc * torch.sqrt(noise_t * h)
277
+ dxt = dxt_det + dxt_stoc
278
+ else:
279
+ score_estimate = (1 + classifier_free_guidance) * self.estimator(xt, mask, mu, t, spk) \
280
+ - classifier_free_guidance * self.estimator(xt, mask, mu, t, dummy_spk)
281
+ dxt = 0.5 * (mu - xt - score_estimate)
282
+ dxt = dxt * noise_t * h
283
+ xt = (xt - dxt) * mask
284
+ return xt
285
+
286
+ @torch.no_grad()
287
+ def forward(self, z, mask, mu, n_timesteps, stoc=False, spk=None,
288
+ use_classifier_free=False,
289
+ classifier_free_guidance=3.0,
290
+ dummy_spk=None
291
+ ):
292
+ return self.reverse_diffusion(z, mask, mu, n_timesteps, stoc, spk, use_classifier_free, classifier_free_guidance, dummy_spk)
293
+
294
+ def loss_t(self, x0, mask, mu, t, spk=None):
295
+ xt, z = self.forward_diffusion(x0, mask, mu, t) # z is sampled from N(0, I)
296
+ time = t.unsqueeze(-1).unsqueeze(-1)
297
+ cum_noise = get_noise(time, self.beta_min, self.beta_max, cumulative=True)
298
+ noise_estimation = self.estimator(xt, mask, mu, t, spk)
299
+ noise_estimation *= torch.sqrt(1.0 - torch.exp(-cum_noise)) # multiply by lambda which is set to be variance
300
+ # actually multiplied by sqrt(lambda), but not lambda
301
+ # NOTE: here use a trick to put lambda into L2 norm so that don't divide z with std.
302
+ loss = torch.sum((noise_estimation + z)**2) / (torch.sum(mask)*self.n_feats)
303
+ return loss, xt
304
+
305
+ def compute_loss(self, x0, mask, mu, spk=None, offset=1e-5):
306
+ t = torch.rand(x0.shape[0], dtype=x0.dtype, device=x0.device,
307
+ requires_grad=False)
308
+ t = torch.clamp(t, offset, 1.0 - offset)
309
+ return self.loss_t(x0, mask, mu, t, spk)
310
+
311
+ def classifier_decode(self, z, mask, mu, n_timesteps, stoc=False, spk=None, classifier_func=None, guidance=1.0, control_emo=None, classifier_type="conformer"):
312
+ # control_emo should be [B, ] tensor
313
+ h = 1.0 / n_timesteps
314
+ xt = z * mask
315
+ for i in range(n_timesteps):
316
+ t = (1.0 - (i + 0.5) * h) * torch.ones(z.shape[0], dtype=z.dtype,
317
+ device=z.device)
318
+ time = t.unsqueeze(-1).unsqueeze(-1)
319
+ noise_t = get_noise(time, self.beta_min, self.beta_max,
320
+ cumulative=False)
321
+ # =========== classifier part ==============
322
+ xt = xt.detach()
323
+ xt.requires_grad_(True)
324
+ if classifier_type == 'CNN-with-time':
325
+ logits = classifier_func(xt.transpose(1, 2), mu.transpose(1, 2), (mask == 1.0).squeeze(1), t=t)
326
+ else:
327
+ logits = classifier_func(xt.transpose(1, 2), mu.transpose(1, 2), (mask == 1.0).squeeze(1))
328
+
329
+ if classifier_type == 'conformer': # [B, C]
330
+ probs = torch.log_softmax(logits, dim=-1) # [B, C]
331
+ elif classifier_type == 'CNN' or classifier_type == 'CNN-with-time' :
332
+ probs_every_place = torch.softmax(logits, dim=-1) # [B, T', C]
333
+ probs_mean = torch.mean(probs_every_place, dim=1) # [B, C]
334
+ probs = torch.log(probs_mean)
335
+ else:
336
+ raise NotImplementedError
337
+
338
+ control_emo_probs = probs[torch.arange(len(control_emo)).to(control_emo.device), control_emo]
339
+ control_emo_probs.sum().backward(retain_graph=True)
340
+ # NOTE: sum is to treat all the components as the same weight.
341
+ xt_grad = xt.grad
342
+ # ==========================================
343
+
344
+ if stoc: # adds stochastic term
345
+ dxt_det = 0.5 * (mu - xt) - self.estimator(xt, mask, mu, t, spk) - guidance * xt_grad
346
+ dxt_det = dxt_det * noise_t * h
347
+ dxt_stoc = torch.randn(z.shape, dtype=z.dtype, device=z.device,
348
+ requires_grad=False)
349
+ dxt_stoc = dxt_stoc * torch.sqrt(noise_t * h)
350
+ dxt = dxt_det + dxt_stoc
351
+ else:
352
+ dxt = 0.5 * (mu - xt - self.estimator(xt, mask, mu, t, spk) - guidance * xt_grad)
353
+ dxt = dxt * noise_t * h
354
+ xt = (xt - dxt) * mask
355
+ return xt
356
+
357
+ def classifier_decode_DPS(self, z, mask, mu, n_timesteps, stoc=False, spk=None, classifier_func=None, guidance=1.0, control_emo=None, classifier_type="conformer"):
358
+ # control_emo should be [B, ] tensor
359
+ h = 1.0 / n_timesteps
360
+ xt = z * mask
361
+ for i in range(n_timesteps):
362
+ t = (1.0 - (i + 0.5) * h) * torch.ones(z.shape[0], dtype=z.dtype, device=z.device)
363
+ time = t.unsqueeze(-1).unsqueeze(-1)
364
+ noise_t = get_noise(time, self.beta_min, self.beta_max, cumulative=False)
365
+ beta_integral_t = get_noise(time, self.beta_min, self.beta_max, cumulative=True)
366
+ bar_alpha_t = math.exp(-beta_integral_t)
367
+
368
+ # =========== classifier part ==============
369
+ xt = xt.detach()
370
+ xt.requires_grad_(True)
371
+ score_estimate = self.estimator(xt, mask, mu, t, spk)
372
+ x0_hat = (xt + (1-bar_alpha_t) * score_estimate) / math.sqrt(bar_alpha_t)
373
+
374
+ if classifier_type == 'CNN-with-time':
375
+ raise NotImplementedError
376
+ else:
377
+ logits = classifier_func(x0_hat.transpose(1, 2), mu.transpose(1, 2), (mask == 1.0).squeeze(1))
378
+ if classifier_type == 'conformer': # [B, C]
379
+ probs = torch.log_softmax(logits, dim=-1) # [B, C]
380
+ elif classifier_type == 'CNN':
381
+ probs_every_place = torch.softmax(logits, dim=-1) # [B, T', C]
382
+ probs_mean = torch.mean(probs_every_place, dim=1) # [B, C]
383
+
384
+ probs_mean = probs_mean + 10E-10
385
+ # NOTE: at the first few steps, x0 may be very large. Then the classifier output logits will also have extreme value range.
386
+ #
387
+
388
+ probs = torch.log(probs_mean)
389
+ else:
390
+ raise NotImplementedError
391
+
392
+ control_emo_probs = probs[torch.arange(len(control_emo)).to(control_emo.device), control_emo]
393
+ control_emo_probs.sum().backward(retain_graph=True)
394
+ # NOTE: sum is to treat all the components as the same weight.
395
+ xt_grad = xt.grad
396
+ # ==========================================
397
+
398
+ if stoc: # adds stochastic term
399
+ dxt_det = 0.5 * (mu - xt) - score_estimate - guidance * xt_grad
400
+ dxt_det = dxt_det * noise_t * h
401
+ dxt_stoc = torch.randn(z.shape, dtype=z.dtype, device=z.device, requires_grad=False)
402
+ dxt_stoc = dxt_stoc * torch.sqrt(noise_t * h)
403
+ dxt = dxt_det + dxt_stoc
404
+ else:
405
+ dxt = 0.5 * (mu - xt - score_estimate - guidance * xt_grad)
406
+ dxt = dxt * noise_t * h
407
+ xt = (xt - dxt) * mask
408
+ return xt
409
+
410
+ def classifier_decode_mixture(self, z, mask, mu, n_timesteps, stoc=False, spk=None, classifier_func=None, guidance=1.0, control_emo1=None,control_emo2=None, emo1_weight=None, classifier_type="conformer"):
411
+ # control_emo should be [B, ] tensor
412
+ h = 1.0 / n_timesteps
413
+ xt = z * mask
414
+ for i in range(n_timesteps):
415
+ t = (1.0 - (i + 0.5) * h) * torch.ones(z.shape[0], dtype=z.dtype,
416
+ device=z.device)
417
+ time = t.unsqueeze(-1).unsqueeze(-1)
418
+ noise_t = get_noise(time, self.beta_min, self.beta_max,
419
+ cumulative=False)
420
+ # =========== classifier part ==============
421
+ xt = xt.detach()
422
+ xt.requires_grad_(True)
423
+ if classifier_type == 'CNN-with-time':
424
+ logits = classifier_func(xt.transpose(1, 2), mu.transpose(1, 2), (mask == 1.0).squeeze(1), t=t)
425
+ else:
426
+ logits = classifier_func(xt.transpose(1, 2), mu.transpose(1, 2), (mask == 1.0).squeeze(1))
427
+
428
+ if classifier_type == 'conformer': # [B, C]
429
+ probs = torch.log_softmax(logits, dim=-1) # [B, C]
430
+ elif classifier_type == 'CNN' or classifier_type == 'CNN-with-time' :
431
+ probs_every_place = torch.softmax(logits, dim=-1) # [B, T', C]
432
+ probs_mean = torch.mean(probs_every_place, dim=1) # [B, C]
433
+ probs = torch.log(probs_mean)
434
+ else:
435
+ raise NotImplementedError
436
+
437
+ control_emo_probs1 = probs[torch.arange(len(control_emo1)).to(control_emo1.device), control_emo1]
438
+ control_emo_probs2 = probs[torch.arange(len(control_emo2)).to(control_emo2.device), control_emo2]
439
+ control_emo_probs = control_emo_probs1 * emo1_weight + control_emo_probs2 * (1-emo1_weight) # interpolate
440
+
441
+ control_emo_probs.sum().backward(retain_graph=True)
442
+ # NOTE: sum is to treat all the components as the same weight.
443
+ xt_grad = xt.grad
444
+ # ==========================================
445
+
446
+ if stoc: # adds stochastic term
447
+ dxt_det = 0.5 * (mu - xt) - self.estimator(xt, mask, mu, t, spk) - guidance * xt_grad
448
+ dxt_det = dxt_det * noise_t * h
449
+ dxt_stoc = torch.randn(z.shape, dtype=z.dtype, device=z.device,
450
+ requires_grad=False)
451
+ dxt_stoc = dxt_stoc * torch.sqrt(noise_t * h)
452
+ dxt = dxt_det + dxt_stoc
453
+ else:
454
+ dxt = 0.5 * (mu - xt - self.estimator(xt, mask, mu, t, spk) - guidance * xt_grad)
455
+ dxt = dxt * noise_t * h
456
+ xt = (xt - dxt) * mask
457
+ return xt
458
+
459
+ def classifier_decode_mixture_DPS(self, z, mask, mu, n_timesteps, stoc=False, spk=None, classifier_func=None, guidance=1.0, control_emo1=None,control_emo2=None, emo1_weight=None, classifier_type="conformer"):
460
+ # control_emo should be [B, ] tensor
461
+ h = 1.0 / n_timesteps
462
+ xt = z * mask
463
+ for i in range(n_timesteps):
464
+ t = (1.0 - (i + 0.5) * h) * torch.ones(z.shape[0], dtype=z.dtype,
465
+ device=z.device)
466
+ time = t.unsqueeze(-1).unsqueeze(-1)
467
+ noise_t = get_noise(time, self.beta_min, self.beta_max,
468
+ cumulative=False)
469
+ beta_integral_t = get_noise(time, self.beta_min, self.beta_max, cumulative=True)
470
+ bar_alpha_t = math.exp(-beta_integral_t)
471
+ # =========== classifier part ==============
472
+ xt = xt.detach()
473
+ xt.requires_grad_(True)
474
+ score_estimate = self.estimator(xt, mask, mu, t, spk)
475
+ x0_hat = (xt + (1 - bar_alpha_t) * score_estimate) / math.sqrt(bar_alpha_t)
476
+
477
+ if classifier_type == 'CNN-with-time':
478
+ raise NotImplementedError
479
+ else:
480
+ logits = classifier_func(x0_hat.transpose(1, 2), mu.transpose(1, 2), (mask == 1.0).squeeze(1))
481
+
482
+ if classifier_type == 'conformer': # [B, C]
483
+ probs = torch.log_softmax(logits, dim=-1) # [B, C]
484
+ elif classifier_type == 'CNN' or classifier_type == 'CNN-with-time' :
485
+ probs_every_place = torch.softmax(logits, dim=-1) # [B, T', C]
486
+ probs_mean = torch.mean(probs_every_place, dim=1) # [B, C]
487
+ probs_mean = probs_mean + 10E-10
488
+
489
+ probs = torch.log(probs_mean)
490
+ else:
491
+ raise NotImplementedError
492
+
493
+ control_emo_probs1 = probs[torch.arange(len(control_emo1)).to(control_emo1.device), control_emo1]
494
+ control_emo_probs2 = probs[torch.arange(len(control_emo2)).to(control_emo2.device), control_emo2]
495
+ control_emo_probs = control_emo_probs1 * emo1_weight + control_emo_probs2 * (1-emo1_weight) # interpolate
496
+
497
+ control_emo_probs.sum().backward(retain_graph=True)
498
+ # NOTE: sum is to treat all the components as the same weight.
499
+ xt_grad = xt.grad
500
+ # ==========================================
501
+
502
+ if stoc: # adds stochastic term
503
+ dxt_det = 0.5 * (mu - xt) - score_estimate - guidance * xt_grad
504
+ dxt_det = dxt_det * noise_t * h
505
+ dxt_stoc = torch.randn(z.shape, dtype=z.dtype, device=z.device,
506
+ requires_grad=False)
507
+ dxt_stoc = dxt_stoc * torch.sqrt(noise_t * h)
508
+ dxt = dxt_det + dxt_stoc
509
+ else:
510
+ dxt = 0.5 * (mu - xt - score_estimate - guidance * xt_grad)
511
+ dxt = dxt * noise_t * h
512
+ xt = (xt - dxt) * mask
513
+ return xt
model/monotonic_align/LICENCE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2020 Jaehyeon Kim
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
model/monotonic_align/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ from https://github.com/jaywalnut310/glow-tts """
2
+
3
+ import numpy as np
4
+ import torch
5
+ from .model.monotonic_align.core import maximum_path_c
6
+
7
+
8
+ def maximum_path(value, mask):
9
+ """ Cython optimised version.
10
+ value: [b, t_x, t_y]
11
+ mask: [b, t_x, t_y]
12
+ """
13
+ value = value * mask
14
+ device = value.device
15
+ dtype = value.dtype
16
+ value = value.data.cpu().numpy().astype(np.float32)
17
+ path = np.zeros_like(value).astype(np.int32)
18
+ mask = mask.data.cpu().numpy()
19
+
20
+ t_x_max = mask.sum(1)[:, 0].astype(np.int32)
21
+ t_y_max = mask.sum(2)[:, 0].astype(np.int32)
22
+ maximum_path_c(path, value, t_x_max, t_y_max)
23
+ return torch.from_numpy(path).to(device=device, dtype=dtype)
model/monotonic_align/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (903 Bytes). View file
 
model/monotonic_align/build/lib.macosx-11.1-arm64-cpython-310/model/monotonic_align/core.cpython-310-darwin.so ADDED
Binary file (162 kB). View file
 
model/monotonic_align/build/temp.linux-x86_64-3.6/core.o ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b523df88cfc72d08a590c92052df8d4e252bfec3edc67981099a163f5a112ed
3
+ size 2200712
model/monotonic_align/build/temp.macosx-10.9-x86_64-3.6/core.o ADDED
Binary file (616 kB). View file
 
model/monotonic_align/build/temp.macosx-11.1-arm64-cpython-310/core.o ADDED
Binary file (173 kB). View file
 
model/monotonic_align/core.c ADDED
The diff for this file is too large to render. See raw diff
 
model/monotonic_align/core.pyx ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ cimport numpy as np
3
+ cimport cython
4
+ from cython.parallel import prange
5
+
6
+
7
+ @cython.boundscheck(False)
8
+ @cython.wraparound(False)
9
+ cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil:
10
+ cdef int x
11
+ cdef int y
12
+ cdef float v_prev
13
+ cdef float v_cur
14
+ cdef float tmp
15
+ cdef int index = t_x - 1
16
+
17
+ for y in range(t_y):
18
+ for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)):
19
+ if x == y:
20
+ v_cur = max_neg_val
21
+ else:
22
+ v_cur = value[x, y-1]
23
+ if x == 0:
24
+ if y == 0:
25
+ v_prev = 0.
26
+ else:
27
+ v_prev = max_neg_val
28
+ else:
29
+ v_prev = value[x-1, y-1]
30
+ value[x, y] = max(v_cur, v_prev) + value[x, y]
31
+
32
+ for y in range(t_y - 1, -1, -1):
33
+ path[index, y] = 1
34
+ if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]):
35
+ index = index - 1
36
+
37
+
38
+ @cython.boundscheck(False)
39
+ @cython.wraparound(False)
40
+ cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil:
41
+ cdef int b = values.shape[0]
42
+
43
+ cdef int i
44
+ for i in prange(b, nogil=True):
45
+ maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val)
model/monotonic_align/model/monotonic_align/core.cpython-310-darwin.so ADDED
Binary file (162 kB). View file
 
model/monotonic_align/setup.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ from https://github.com/jaywalnut310/glow-tts """
2
+
3
+ from distutils.core import setup
4
+ from Cython.Build import cythonize
5
+ import numpy
6
+
7
+ setup(
8
+ name = 'monotonic_align',
9
+ ext_modules = cythonize("core.pyx"),
10
+ include_dirs=[numpy.get_include()]
11
+ )
model/text_encoder.py ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ from https://github.com/jaywalnut310/glow-tts """
2
+
3
+ import math
4
+
5
+ import torch
6
+
7
+ from model.base import BaseModule
8
+ from model.utils import sequence_mask, convert_pad_shape
9
+
10
+
11
+ class LayerNorm(BaseModule):
12
+ def __init__(self, channels, eps=1e-4):
13
+ super(LayerNorm, self).__init__()
14
+ self.channels = channels
15
+ self.eps = eps
16
+
17
+ self.gamma = torch.nn.Parameter(torch.ones(channels))
18
+ self.beta = torch.nn.Parameter(torch.zeros(channels))
19
+
20
+ def forward(self, x):
21
+ n_dims = len(x.shape)
22
+ mean = torch.mean(x, 1, keepdim=True)
23
+ variance = torch.mean((x - mean)**2, 1, keepdim=True)
24
+
25
+ x = (x - mean) * torch.rsqrt(variance + self.eps)
26
+
27
+ shape = [1, -1] + [1] * (n_dims - 2)
28
+ x = x * self.gamma.view(*shape) + self.beta.view(*shape)
29
+ return x
30
+
31
+
32
+ class ConvReluNorm(BaseModule):
33
+ def __init__(self, in_channels, hidden_channels, out_channels, kernel_size,
34
+ n_layers, p_dropout):
35
+ super(ConvReluNorm, self).__init__()
36
+ self.in_channels = in_channels
37
+ self.hidden_channels = hidden_channels
38
+ self.out_channels = out_channels
39
+ self.kernel_size = kernel_size
40
+ self.n_layers = n_layers
41
+ self.p_dropout = p_dropout
42
+
43
+ self.conv_layers = torch.nn.ModuleList()
44
+ self.norm_layers = torch.nn.ModuleList()
45
+ self.conv_layers.append(torch.nn.Conv1d(in_channels, hidden_channels,
46
+ kernel_size, padding=kernel_size//2))
47
+ self.norm_layers.append(LayerNorm(hidden_channels))
48
+ self.relu_drop = torch.nn.Sequential(torch.nn.ReLU(), torch.nn.Dropout(p_dropout))
49
+ for _ in range(n_layers - 1):
50
+ self.conv_layers.append(torch.nn.Conv1d(hidden_channels, hidden_channels,
51
+ kernel_size, padding=kernel_size//2))
52
+ self.norm_layers.append(LayerNorm(hidden_channels))
53
+ self.proj = torch.nn.Conv1d(hidden_channels, out_channels, 1)
54
+ self.proj.weight.data.zero_()
55
+ self.proj.bias.data.zero_()
56
+
57
+ def forward(self, x, x_mask):
58
+ x_org = x
59
+ for i in range(self.n_layers):
60
+ x = self.conv_layers[i](x * x_mask)
61
+ x = self.norm_layers[i](x)
62
+ x = self.relu_drop(x)
63
+ x = x_org + self.proj(x)
64
+ return x * x_mask
65
+
66
+
67
+ class DurationPredictor(BaseModule):
68
+ def __init__(self, in_channels, filter_channels, kernel_size, p_dropout):
69
+ super(DurationPredictor, self).__init__()
70
+ self.in_channels = in_channels
71
+ self.filter_channels = filter_channels
72
+ self.p_dropout = p_dropout
73
+
74
+ self.drop = torch.nn.Dropout(p_dropout)
75
+ self.conv_1 = torch.nn.Conv1d(in_channels, filter_channels,
76
+ kernel_size, padding=kernel_size//2)
77
+ self.norm_1 = LayerNorm(filter_channels)
78
+ self.conv_2 = torch.nn.Conv1d(filter_channels, filter_channels,
79
+ kernel_size, padding=kernel_size//2)
80
+ self.norm_2 = LayerNorm(filter_channels)
81
+ self.proj = torch.nn.Conv1d(filter_channels, 1, 1)
82
+
83
+ def forward(self, x, x_mask):
84
+ x = self.conv_1(x * x_mask)
85
+ x = torch.relu(x)
86
+ x = self.norm_1(x)
87
+ x = self.drop(x)
88
+ x = self.conv_2(x * x_mask)
89
+ x = torch.relu(x)
90
+ x = self.norm_2(x)
91
+ x = self.drop(x)
92
+ x = self.proj(x * x_mask)
93
+ return x * x_mask
94
+
95
+
96
+ class MultiHeadAttention(BaseModule):
97
+ def __init__(self, channels, out_channels, n_heads, window_size=None,
98
+ heads_share=True, p_dropout=0.0, proximal_bias=False,
99
+ proximal_init=False):
100
+ super(MultiHeadAttention, self).__init__()
101
+ assert channels % n_heads == 0
102
+
103
+ self.channels = channels
104
+ self.out_channels = out_channels
105
+ self.n_heads = n_heads
106
+ self.window_size = window_size
107
+ self.heads_share = heads_share
108
+ self.proximal_bias = proximal_bias
109
+ self.p_dropout = p_dropout
110
+ self.attn = None
111
+
112
+ self.k_channels = channels // n_heads
113
+ self.conv_q = torch.nn.Conv1d(channels, channels, 1)
114
+ self.conv_k = torch.nn.Conv1d(channels, channels, 1)
115
+ self.conv_v = torch.nn.Conv1d(channels, channels, 1)
116
+ if window_size is not None:
117
+ n_heads_rel = 1 if heads_share else n_heads
118
+ rel_stddev = self.k_channels**-0.5
119
+ self.emb_rel_k = torch.nn.Parameter(torch.randn(n_heads_rel,
120
+ window_size * 2 + 1, self.k_channels) * rel_stddev)
121
+ self.emb_rel_v = torch.nn.Parameter(torch.randn(n_heads_rel,
122
+ window_size * 2 + 1, self.k_channels) * rel_stddev)
123
+ self.conv_o = torch.nn.Conv1d(channels, out_channels, 1)
124
+ self.drop = torch.nn.Dropout(p_dropout)
125
+
126
+ torch.nn.init.xavier_uniform_(self.conv_q.weight)
127
+ torch.nn.init.xavier_uniform_(self.conv_k.weight)
128
+ if proximal_init:
129
+ self.conv_k.weight.data.copy_(self.conv_q.weight.data)
130
+ self.conv_k.bias.data.copy_(self.conv_q.bias.data)
131
+ torch.nn.init.xavier_uniform_(self.conv_v.weight)
132
+
133
+ def forward(self, x, c, attn_mask=None):
134
+ q = self.conv_q(x)
135
+ k = self.conv_k(c)
136
+ v = self.conv_v(c)
137
+
138
+ x, self.attn = self.attention(q, k, v, mask=attn_mask)
139
+
140
+ x = self.conv_o(x)
141
+ return x
142
+
143
+ def attention(self, query, key, value, mask=None):
144
+ b, d, t_s, t_t = (*key.size(), query.size(2))
145
+ query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3)
146
+ key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
147
+ value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3)
148
+
149
+ scores = torch.matmul(query, key.transpose(-2, -1)) / math.sqrt(self.k_channels)
150
+ if self.window_size is not None:
151
+ assert t_s == t_t, "Relative attention is only available for self-attention."
152
+ key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s)
153
+ rel_logits = self._matmul_with_relative_keys(query, key_relative_embeddings)
154
+ rel_logits = self._relative_position_to_absolute_position(rel_logits)
155
+ scores_local = rel_logits / math.sqrt(self.k_channels)
156
+ scores = scores + scores_local
157
+ if self.proximal_bias:
158
+ assert t_s == t_t, "Proximal bias is only available for self-attention."
159
+ scores = scores + self._attention_bias_proximal(t_s).to(device=scores.device,
160
+ dtype=scores.dtype)
161
+ if mask is not None:
162
+ scores = scores.masked_fill(mask == 0, -1e4)
163
+ p_attn = torch.nn.functional.softmax(scores, dim=-1)
164
+ p_attn = self.drop(p_attn)
165
+ output = torch.matmul(p_attn, value)
166
+ if self.window_size is not None:
167
+ relative_weights = self._absolute_position_to_relative_position(p_attn)
168
+ value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s)
169
+ output = output + self._matmul_with_relative_values(relative_weights,
170
+ value_relative_embeddings)
171
+ output = output.transpose(2, 3).contiguous().view(b, d, t_t)
172
+ return output, p_attn
173
+
174
+ def _matmul_with_relative_values(self, x, y):
175
+ ret = torch.matmul(x, y.unsqueeze(0))
176
+ return ret
177
+
178
+ def _matmul_with_relative_keys(self, x, y):
179
+ ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1))
180
+ return ret
181
+
182
+ def _get_relative_embeddings(self, relative_embeddings, length):
183
+ pad_length = max(length - (self.window_size + 1), 0)
184
+ slice_start_position = max((self.window_size + 1) - length, 0)
185
+ slice_end_position = slice_start_position + 2 * length - 1
186
+ if pad_length > 0:
187
+ padded_relative_embeddings = torch.nn.functional.pad(
188
+ relative_embeddings, convert_pad_shape([[0, 0],
189
+ [pad_length, pad_length], [0, 0]]))
190
+ else:
191
+ padded_relative_embeddings = relative_embeddings
192
+ used_relative_embeddings = padded_relative_embeddings[:,
193
+ slice_start_position:slice_end_position]
194
+ return used_relative_embeddings
195
+
196
+ def _relative_position_to_absolute_position(self, x):
197
+ batch, heads, length, _ = x.size()
198
+ x = torch.nn.functional.pad(x, convert_pad_shape([[0,0],[0,0],[0,0],[0,1]]))
199
+ x_flat = x.view([batch, heads, length * 2 * length])
200
+ x_flat = torch.nn.functional.pad(x_flat, convert_pad_shape([[0,0],[0,0],[0,length-1]]))
201
+ x_final = x_flat.view([batch, heads, length+1, 2*length-1])[:, :, :length, length-1:]
202
+ return x_final
203
+
204
+ def _absolute_position_to_relative_position(self, x):
205
+ batch, heads, length, _ = x.size()
206
+ x = torch.nn.functional.pad(x, convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length-1]]))
207
+ x_flat = x.view([batch, heads, length**2 + length*(length - 1)])
208
+ x_flat = torch.nn.functional.pad(x_flat, convert_pad_shape([[0, 0], [0, 0], [length, 0]]))
209
+ x_final = x_flat.view([batch, heads, length, 2*length])[:,:,:,1:]
210
+ return x_final
211
+
212
+ def _attention_bias_proximal(self, length):
213
+ r = torch.arange(length, dtype=torch.float32)
214
+ diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1)
215
+ return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0)
216
+
217
+
218
+ class FFN(BaseModule):
219
+ def __init__(self, in_channels, out_channels, filter_channels, kernel_size,
220
+ p_dropout=0.0):
221
+ super(FFN, self).__init__()
222
+ self.in_channels = in_channels
223
+ self.out_channels = out_channels
224
+ self.filter_channels = filter_channels
225
+ self.kernel_size = kernel_size
226
+ self.p_dropout = p_dropout
227
+
228
+ self.conv_1 = torch.nn.Conv1d(in_channels, filter_channels, kernel_size,
229
+ padding=kernel_size//2)
230
+ self.conv_2 = torch.nn.Conv1d(filter_channels, out_channels, kernel_size,
231
+ padding=kernel_size//2)
232
+ self.drop = torch.nn.Dropout(p_dropout)
233
+
234
+ def forward(self, x, x_mask):
235
+ x = self.conv_1(x * x_mask)
236
+ x = torch.relu(x)
237
+ x = self.drop(x)
238
+ x = self.conv_2(x * x_mask)
239
+ return x * x_mask
240
+
241
+
242
+ class Encoder(BaseModule):
243
+ def __init__(self, hidden_channels, filter_channels, n_heads, n_layers,
244
+ kernel_size=1, p_dropout=0.0, window_size=None, **kwargs):
245
+ super(Encoder, self).__init__()
246
+ self.hidden_channels = hidden_channels
247
+ self.filter_channels = filter_channels
248
+ self.n_heads = n_heads
249
+ self.n_layers = n_layers
250
+ self.kernel_size = kernel_size
251
+ self.p_dropout = p_dropout
252
+ self.window_size = window_size
253
+
254
+ self.drop = torch.nn.Dropout(p_dropout)
255
+ self.attn_layers = torch.nn.ModuleList()
256
+ self.norm_layers_1 = torch.nn.ModuleList()
257
+ self.ffn_layers = torch.nn.ModuleList()
258
+ self.norm_layers_2 = torch.nn.ModuleList()
259
+ for _ in range(self.n_layers):
260
+ self.attn_layers.append(MultiHeadAttention(hidden_channels, hidden_channels,
261
+ n_heads, window_size=window_size, p_dropout=p_dropout))
262
+ self.norm_layers_1.append(LayerNorm(hidden_channels))
263
+ self.ffn_layers.append(FFN(hidden_channels, hidden_channels,
264
+ filter_channels, kernel_size, p_dropout=p_dropout))
265
+ self.norm_layers_2.append(LayerNorm(hidden_channels))
266
+
267
+ def forward(self, x, x_mask):
268
+ attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1)
269
+ for i in range(self.n_layers):
270
+ x = x * x_mask
271
+ y = self.attn_layers[i](x, x, attn_mask)
272
+ y = self.drop(y)
273
+ x = self.norm_layers_1[i](x + y)
274
+ y = self.ffn_layers[i](x, x_mask)
275
+ y = self.drop(y)
276
+ x = self.norm_layers_2[i](x + y)
277
+ x = x * x_mask
278
+ return x
279
+
280
+
281
+ class TextEncoder(BaseModule):
282
+ def __init__(self, n_vocab, n_feats, n_channels, filter_channels,
283
+ filter_channels_dp, n_heads, n_layers, kernel_size,
284
+ p_dropout, window_size=None, spk_emb_dim=64, n_spks=1):
285
+ super(TextEncoder, self).__init__()
286
+ self.n_vocab = n_vocab
287
+ self.n_feats = n_feats
288
+ self.n_channels = n_channels
289
+ self.filter_channels = filter_channels
290
+ self.filter_channels_dp = filter_channels_dp
291
+ self.n_heads = n_heads
292
+ self.n_layers = n_layers
293
+ self.kernel_size = kernel_size
294
+ self.p_dropout = p_dropout
295
+ self.window_size = window_size
296
+ self.spk_emb_dim = spk_emb_dim
297
+ self.n_spks = n_spks
298
+
299
+ self.emb = torch.nn.Embedding(n_vocab, n_channels)
300
+ torch.nn.init.normal_(self.emb.weight, 0.0, n_channels**-0.5)
301
+
302
+ self.prenet = ConvReluNorm(n_channels, n_channels, n_channels,
303
+ kernel_size=5, n_layers=3, p_dropout=0.5)
304
+
305
+ self.encoder = Encoder(n_channels + (spk_emb_dim if n_spks > 1 else 0), filter_channels, n_heads, n_layers,
306
+ kernel_size, p_dropout, window_size=window_size)
307
+
308
+ self.proj_m = torch.nn.Conv1d(n_channels + (spk_emb_dim if n_spks > 1 else 0), n_feats, 1)
309
+ self.proj_w = DurationPredictor(n_channels + (spk_emb_dim if n_spks > 1 else 0), filter_channels_dp,
310
+ kernel_size, p_dropout)
311
+
312
+ def forward(self, x, x_lengths, spk=None):
313
+ x = self.emb(x) * math.sqrt(self.n_channels)
314
+ x = torch.transpose(x, 1, -1)
315
+ x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype)
316
+
317
+ x = self.prenet(x, x_mask)
318
+ if self.n_spks > 1:
319
+ x = torch.cat([x, spk.unsqueeze(-1).repeat(1, 1, x.shape[-1])], dim=1)
320
+ x = self.encoder(x, x_mask)
321
+ mu = self.proj_m(x) * x_mask
322
+
323
+ x_dp = torch.detach(x)
324
+ logw = self.proj_w(x_dp, x_mask)
325
+
326
+ return mu, logw, x_mask
model/tts.py ADDED
@@ -0,0 +1,558 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import random
3
+
4
+ import torch
5
+
6
+ from model import monotonic_align
7
+ from model.base import BaseModule
8
+ from model.text_encoder import TextEncoder
9
+ from model.diffusion import Diffusion
10
+ from model.utils import sequence_mask, generate_path, duration_loss, fix_len_compatibility
11
+
12
+
13
+ class GradTTSWithEmo(BaseModule):
14
+ def __init__(self, n_vocab=148, n_spks=1,n_emos=5, spk_emb_dim=64,
15
+ n_enc_channels=192, filter_channels=768, filter_channels_dp=256,
16
+ n_heads=2, n_enc_layers=6, enc_kernel=3, enc_dropout=0.1, window_size=4,
17
+ n_feats=80, dec_dim=64, beta_min=0.05, beta_max=20.0, pe_scale=1000,
18
+ use_classifier_free=False, dummy_spk_rate=0.5,
19
+ **kwargs):
20
+ super(GradTTSWithEmo, self).__init__()
21
+ self.n_vocab = n_vocab
22
+ self.n_spks = n_spks
23
+ self.n_emos = n_emos
24
+ self.spk_emb_dim = spk_emb_dim
25
+ self.n_enc_channels = n_enc_channels
26
+ self.filter_channels = filter_channels
27
+ self.filter_channels_dp = filter_channels_dp
28
+ self.n_heads = n_heads
29
+ self.n_enc_layers = n_enc_layers
30
+ self.enc_kernel = enc_kernel
31
+ self.enc_dropout = enc_dropout
32
+ self.window_size = window_size
33
+ self.n_feats = n_feats
34
+ self.dec_dim = dec_dim
35
+ self.beta_min = beta_min
36
+ self.beta_max = beta_max
37
+ self.pe_scale = pe_scale
38
+ self.use_classifier_free = use_classifier_free
39
+
40
+ # if n_spks > 1:
41
+ self.spk_emb = torch.nn.Embedding(n_spks, spk_emb_dim)
42
+ self.emo_emb = torch.nn.Embedding(n_emos, spk_emb_dim)
43
+ self.merge_spk_emo = torch.nn.Sequential(
44
+ torch.nn.Linear(spk_emb_dim*2, spk_emb_dim),
45
+ torch.nn.ReLU(),
46
+ torch.nn.Linear(spk_emb_dim, spk_emb_dim)
47
+ )
48
+ self.encoder = TextEncoder(n_vocab, n_feats, n_enc_channels,
49
+ filter_channels, filter_channels_dp, n_heads,
50
+ n_enc_layers, enc_kernel, enc_dropout, window_size,
51
+ spk_emb_dim=spk_emb_dim, n_spks=n_spks)
52
+ self.decoder = Diffusion(n_feats, dec_dim, spk_emb_dim, beta_min, beta_max, pe_scale)
53
+
54
+ if self.use_classifier_free:
55
+ self.dummy_xv = torch.nn.Parameter(torch.randn(size=(spk_emb_dim, )))
56
+ self.dummy_rate = dummy_spk_rate
57
+ print(f"Using classifier free with rate {self.dummy_rate}")
58
+
59
+ @torch.no_grad()
60
+ def forward(self, x, x_lengths, n_timesteps, temperature=1.0, stoc=False, spk=None, emo=None,
61
+ length_scale=1.0, classifier_free_guidance=1., force_dur=None):
62
+ """
63
+ Generates mel-spectrogram from text. Returns:
64
+ 1. encoder outputs
65
+ 2. decoder outputs
66
+ 3. generated alignment
67
+
68
+ Args:
69
+ x (torch.Tensor): batch of texts, converted to a tensor with phoneme embedding ids.
70
+ x_lengths (torch.Tensor): lengths of texts in batch.
71
+ n_timesteps (int): number of steps to use for reverse diffusion in decoder.
72
+ temperature (float, optional): controls variance of terminal distribution.
73
+ stoc (bool, optional): flag that adds stochastic term to the decoder sampler.
74
+ Usually, does not provide synthesis improvements.
75
+ length_scale (float, optional): controls speech pace.
76
+ Increase value to slow down generated speech and vice versa.
77
+ """
78
+ x, x_lengths = self.relocate_input([x, x_lengths])
79
+
80
+ # Get speaker embedding
81
+ spk = self.spk_emb(spk)
82
+ emo = self.emo_emb(emo)
83
+
84
+ if self.use_classifier_free:
85
+ emo = emo / torch.sqrt(torch.sum(emo**2, dim=1, keepdim=True)) # unit norm
86
+
87
+ spk_merged = self.merge_spk_emo(torch.cat([spk, emo], dim=-1))
88
+
89
+ # Get encoder_outputs `mu_x` and log-scaled token durations `logw`
90
+ mu_x, logw, x_mask = self.encoder(x, x_lengths, spk_merged)
91
+
92
+ w = torch.exp(logw) * x_mask
93
+ w_ceil = torch.ceil(w) * length_scale
94
+ if force_dur is not None:
95
+ w_ceil = force_dur.unsqueeze(1) # [1, 1, Ltext]
96
+ y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
97
+ y_max_length = int(y_lengths.max())
98
+ y_max_length_ = fix_len_compatibility(y_max_length)
99
+
100
+ # Using obtained durations `w` construct alignment map `attn`
101
+ y_mask = sequence_mask(y_lengths, y_max_length_).unsqueeze(1).to(x_mask.dtype)
102
+ attn_mask = x_mask.unsqueeze(-1) * y_mask.unsqueeze(2)
103
+ attn = generate_path(w_ceil.squeeze(1), attn_mask.squeeze(1)).unsqueeze(1)
104
+
105
+ # Align encoded text and get mu_y
106
+ mu_y = torch.matmul(attn.squeeze(1).transpose(1, 2), mu_x.transpose(1, 2))
107
+ mu_y = mu_y.transpose(1, 2)
108
+ encoder_outputs = mu_y[:, :, :y_max_length]
109
+
110
+ # Sample latent representation from terminal distribution N(mu_y, I)
111
+ z = mu_y + torch.randn_like(mu_y, device=mu_y.device) / temperature
112
+ # print(z)
113
+ # Generate sample by performing reverse dynamics
114
+
115
+ unit_dummy_emo = self.dummy_xv / torch.sqrt(torch.sum(self.dummy_xv**2)) if self.use_classifier_free else None
116
+ dummy_spk = self.merge_spk_emo(torch.cat([spk, unit_dummy_emo.unsqueeze(0).repeat(len(spk), 1)], dim=-1)) if self.use_classifier_free else None
117
+
118
+ decoder_outputs = self.decoder(z, y_mask, mu_y, n_timesteps, stoc, spk_merged,
119
+ use_classifier_free=self.use_classifier_free,
120
+ classifier_free_guidance=classifier_free_guidance,
121
+ dummy_spk=dummy_spk)
122
+ decoder_outputs = decoder_outputs[:, :, :y_max_length]
123
+
124
+ return encoder_outputs, decoder_outputs, attn[:, :, :y_max_length]
125
+
126
+ def classifier_guidance_decode(self, x, x_lengths, n_timesteps, temperature=1.0, stoc=False, spk=None, emo=None,
127
+ length_scale=1.0, classifier_func=None, guidance=1.0, classifier_type='conformer'):
128
+ x, x_lengths = self.relocate_input([x, x_lengths])
129
+
130
+ # Get speaker embedding
131
+ spk = self.spk_emb(spk)
132
+ dummy_emo = self.emo_emb(torch.zeros_like(emo).long()) # this is for feeding the text encoder.
133
+
134
+ spk_merged = self.merge_spk_emo(torch.cat([spk, dummy_emo], dim=-1))
135
+
136
+ # Get encoder_outputs `mu_x` and log-scaled token durations `logw`
137
+ mu_x, logw, x_mask = self.encoder(x, x_lengths, spk_merged)
138
+
139
+ w = torch.exp(logw) * x_mask
140
+ # print("w shape is ", w.shape)
141
+ w_ceil = torch.ceil(w) * length_scale
142
+ y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
143
+ y_max_length = int(y_lengths.max())
144
+ if classifier_type == 'CNN' or classifier_type == 'CNN-with-time' :
145
+ y_max_length = max(y_max_length, 180) # NOTE: added for CNN classifier
146
+ y_max_length_ = fix_len_compatibility(y_max_length)
147
+
148
+ # Using obtained durations `w` construct alignment map `attn`
149
+ y_mask = sequence_mask(y_lengths, y_max_length_).unsqueeze(1).to(x_mask.dtype)
150
+ attn_mask = x_mask.unsqueeze(-1) * y_mask.unsqueeze(2)
151
+ attn = generate_path(w_ceil.squeeze(1), attn_mask.squeeze(1)).unsqueeze(1)
152
+
153
+ # Align encoded text and get mu_y
154
+ mu_y = torch.matmul(attn.squeeze(1).transpose(1, 2), mu_x.transpose(1, 2))
155
+ mu_y = mu_y.transpose(1, 2)
156
+ encoder_outputs = mu_y[:, :, :y_max_length]
157
+
158
+ # Sample latent representation from terminal distribution N(mu_y, I)
159
+ z = mu_y + torch.randn_like(mu_y, device=mu_y.device) / temperature
160
+ # Generate sample by performing reverse dynamics
161
+
162
+ decoder_outputs = self.decoder.classifier_decode(z, y_mask, mu_y, n_timesteps, stoc, spk_merged,
163
+ classifier_func, guidance,
164
+ control_emo=emo, classifier_type=classifier_type)
165
+ decoder_outputs = decoder_outputs[:, :, :y_max_length]
166
+ return encoder_outputs, decoder_outputs, attn[:, :, :y_max_length]
167
+
168
+ def classifier_guidance_decode_DPS(self, x, x_lengths, n_timesteps, temperature=1.0, stoc=False, spk=None, emo=None,
169
+ length_scale=1.0, classifier_func=None, guidance=1.0, classifier_type='conformer'):
170
+ x, x_lengths = self.relocate_input([x, x_lengths])
171
+
172
+ # Get speaker embedding
173
+ spk = self.spk_emb(spk)
174
+ dummy_emo = self.emo_emb(torch.zeros_like(emo).long()) # this is for feeding the text encoder.
175
+
176
+ spk_merged = self.merge_spk_emo(torch.cat([spk, dummy_emo], dim=-1))
177
+
178
+ # Get encoder_outputs `mu_x` and log-scaled token durations `logw`
179
+ mu_x, logw, x_mask = self.encoder(x, x_lengths, spk_merged)
180
+
181
+ w = torch.exp(logw) * x_mask
182
+ w_ceil = torch.ceil(w) * length_scale
183
+ y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
184
+ y_max_length = int(y_lengths.max())
185
+ if classifier_type == 'CNN' or classifier_type == 'CNN-with-time' :
186
+ y_max_length = max(y_max_length, 180) # NOTE: added for CNN classifier
187
+ y_max_length_ = fix_len_compatibility(y_max_length)
188
+
189
+ # Using obtained durations `w` construct alignment map `attn`
190
+ y_mask = sequence_mask(y_lengths, y_max_length_).unsqueeze(1).to(x_mask.dtype)
191
+ attn_mask = x_mask.unsqueeze(-1) * y_mask.unsqueeze(2)
192
+ attn = generate_path(w_ceil.squeeze(1), attn_mask.squeeze(1)).unsqueeze(1)
193
+
194
+ # Align encoded text and get mu_y
195
+ mu_y = torch.matmul(attn.squeeze(1).transpose(1, 2), mu_x.transpose(1, 2))
196
+ mu_y = mu_y.transpose(1, 2)
197
+ encoder_outputs = mu_y[:, :, :y_max_length]
198
+
199
+ # Sample latent representation from terminal distribution N(mu_y, I)
200
+ z = mu_y + torch.randn_like(mu_y, device=mu_y.device) / temperature
201
+ # Generate sample by performing reverse dynamics
202
+
203
+ decoder_outputs = self.decoder.classifier_decode_DPS(z, y_mask, mu_y, n_timesteps, stoc, spk_merged,
204
+ classifier_func, guidance,
205
+ control_emo=emo, classifier_type=classifier_type)
206
+ decoder_outputs = decoder_outputs[:, :, :y_max_length]
207
+ return encoder_outputs, decoder_outputs, attn[:, :, :y_max_length]
208
+
209
+ def classifier_guidance_decode_two_mixture(self, x, x_lengths, n_timesteps, temperature=1.0, stoc=False, spk=None, emo1=None, emo2=None, emo1_weight=None,
210
+ length_scale=1.0, classifier_func=None, guidance=1.0, classifier_type='conformer'):
211
+ x, x_lengths = self.relocate_input([x, x_lengths])
212
+
213
+ # Get speaker embedding
214
+ spk = self.spk_emb(spk)
215
+ dummy_emo = self.emo_emb(torch.zeros_like(emo1).long()) # this is for feeding the text encoder.
216
+
217
+ spk_merged = self.merge_spk_emo(torch.cat([spk, dummy_emo], dim=-1))
218
+
219
+ # Get encoder_outputs `mu_x` and log-scaled token durations `logw`
220
+ mu_x, logw, x_mask = self.encoder(x, x_lengths, spk_merged)
221
+
222
+ w = torch.exp(logw) * x_mask
223
+ w_ceil = torch.ceil(w) * length_scale
224
+ y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
225
+ y_max_length = int(y_lengths.max())
226
+ if classifier_type == 'CNN' or classifier_type == 'CNN-with-time' :
227
+ y_max_length = max(y_max_length, 180) # NOTE: added for CNN classifier
228
+ y_max_length_ = fix_len_compatibility(y_max_length)
229
+
230
+ # Using obtained durations `w` construct alignment map `attn`
231
+ y_mask = sequence_mask(y_lengths, y_max_length_).unsqueeze(1).to(x_mask.dtype)
232
+ attn_mask = x_mask.unsqueeze(-1) * y_mask.unsqueeze(2)
233
+ attn = generate_path(w_ceil.squeeze(1), attn_mask.squeeze(1)).unsqueeze(1)
234
+
235
+ # Align encoded text and get mu_y
236
+ mu_y = torch.matmul(attn.squeeze(1).transpose(1, 2), mu_x.transpose(1, 2))
237
+ mu_y = mu_y.transpose(1, 2)
238
+ encoder_outputs = mu_y[:, :, :y_max_length]
239
+
240
+ # Sample latent representation from terminal distribution N(mu_y, I)
241
+ z = mu_y + torch.randn_like(mu_y, device=mu_y.device) / temperature
242
+ # Generate sample by performing reverse dynamics
243
+
244
+ decoder_outputs = self.decoder.classifier_decode_mixture(z, y_mask, mu_y, n_timesteps, stoc, spk_merged,
245
+ classifier_func, guidance,
246
+ control_emo1=emo1, control_emo2=emo2, emo1_weight=emo1_weight, classifier_type=classifier_type)
247
+ decoder_outputs = decoder_outputs[:, :, :y_max_length]
248
+ return encoder_outputs, decoder_outputs, attn[:, :, :y_max_length]
249
+
250
+ def classifier_guidance_decode_two_mixture_DPS(self, x, x_lengths, n_timesteps, temperature=1.0, stoc=False, spk=None, emo1=None, emo2=None, emo1_weight=None,
251
+ length_scale=1.0, classifier_func=None, guidance=1.0, classifier_type='conformer'):
252
+ x, x_lengths = self.relocate_input([x, x_lengths])
253
+
254
+ # Get speaker embedding
255
+ spk = self.spk_emb(spk)
256
+ dummy_emo = self.emo_emb(torch.zeros_like(emo1).long()) # this is for feeding the text encoder.
257
+
258
+ spk_merged = self.merge_spk_emo(torch.cat([spk, dummy_emo], dim=-1))
259
+
260
+ # Get encoder_outputs `mu_x` and log-scaled token durations `logw`
261
+ mu_x, logw, x_mask = self.encoder(x, x_lengths, spk_merged)
262
+
263
+ w = torch.exp(logw) * x_mask
264
+ w_ceil = torch.ceil(w) * length_scale
265
+ y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
266
+ y_max_length = int(y_lengths.max())
267
+ if classifier_type == 'CNN' or classifier_type == 'CNN-with-time' :
268
+ y_max_length = max(y_max_length, 180) # NOTE: added for CNN classifier
269
+ y_max_length_ = fix_len_compatibility(y_max_length)
270
+
271
+ # Using obtained durations `w` construct alignment map `attn`
272
+ y_mask = sequence_mask(y_lengths, y_max_length_).unsqueeze(1).to(x_mask.dtype)
273
+ attn_mask = x_mask.unsqueeze(-1) * y_mask.unsqueeze(2)
274
+ attn = generate_path(w_ceil.squeeze(1), attn_mask.squeeze(1)).unsqueeze(1)
275
+
276
+ # Align encoded text and get mu_y
277
+ mu_y = torch.matmul(attn.squeeze(1).transpose(1, 2), mu_x.transpose(1, 2))
278
+ mu_y = mu_y.transpose(1, 2)
279
+ encoder_outputs = mu_y[:, :, :y_max_length]
280
+
281
+ # Sample latent representation from terminal distribution N(mu_y, I)
282
+ z = mu_y + torch.randn_like(mu_y, device=mu_y.device) / temperature
283
+ # Generate sample by performing reverse dynamics
284
+
285
+ decoder_outputs = self.decoder.classifier_decode_mixture_DPS(z, y_mask, mu_y, n_timesteps, stoc, spk_merged,
286
+ classifier_func, guidance,
287
+ control_emo1=emo1, control_emo2=emo2, emo1_weight=emo1_weight, classifier_type=classifier_type)
288
+ decoder_outputs = decoder_outputs[:, :, :y_max_length]
289
+ return encoder_outputs, decoder_outputs, attn[:, :, :y_max_length]
290
+
291
+ def compute_loss(self, x, x_lengths, y, y_lengths, spk=None, emo=None, out_size=None, use_gt_dur=False, durs=None):
292
+ """
293
+ Computes 3 losses:
294
+ 1. duration loss: loss between predicted token durations and those extracted by Monotinic Alignment Search (MAS).
295
+ 2. prior loss: loss between mel-spectrogram and encoder outputs.
296
+ 3. diffusion loss: loss between gaussian noise and its reconstruction by diffusion-based decoder.
297
+
298
+ Args:
299
+ x (torch.Tensor): batch of texts, converted to a tensor with phoneme embedding ids.
300
+ x_lengths (torch.Tensor): lengths of texts in batch.
301
+ y (torch.Tensor): batch of corresponding mel-spectrograms.
302
+ y_lengths (torch.Tensor): lengths of mel-spectrograms in batch.
303
+ out_size (int, optional): length (in mel's sampling rate) of segment to cut, on which decoder will be trained.
304
+ Should be divisible by 2^{num of UNet downsamplings}. Needed to increase batch size.
305
+ use_gt_dur: bool
306
+ durs: gt duration
307
+ """
308
+ x, x_lengths, y, y_lengths = self.relocate_input([x, x_lengths, y, y_lengths]) # y: B, 80, L
309
+
310
+ spk = self.spk_emb(spk)
311
+ emo = self.emo_emb(emo) # [B, D]
312
+ if self.use_classifier_free:
313
+ emo = emo / torch.sqrt(torch.sum(emo ** 2, dim=1, keepdim=True)) # unit norm
314
+ use_dummy_per_sample = torch.distributions.Binomial(1, torch.tensor(
315
+ [self.dummy_rate] * len(emo))).sample().bool() # [b, ] True/False where True accords to rate
316
+ emo[use_dummy_per_sample] = (self.dummy_xv / torch.sqrt(
317
+ torch.sum(self.dummy_xv ** 2))) # substitute with dummy xv(unit norm too)
318
+
319
+ spk = self.merge_spk_emo(torch.cat([spk, emo], dim=-1)) # [B, D]
320
+
321
+ # Get encoder_outputs `mu_x` and log-scaled token durations `logw`
322
+ mu_x, logw, x_mask = self.encoder(x, x_lengths, spk)
323
+ y_max_length = y.shape[-1]
324
+
325
+ y_mask = sequence_mask(y_lengths, y_max_length).unsqueeze(1).to(x_mask)
326
+ attn_mask = x_mask.unsqueeze(-1) * y_mask.unsqueeze(2)
327
+
328
+ # Use MAS to find most likely alignment `attn` between text and mel-spectrogram
329
+ if use_gt_dur:
330
+ attn = generate_path(durs, attn_mask.squeeze(1)).detach()
331
+ else:
332
+ with torch.no_grad():
333
+ const = -0.5 * math.log(2 * math.pi) * self.n_feats
334
+ factor = -0.5 * torch.ones(mu_x.shape, dtype=mu_x.dtype, device=mu_x.device)
335
+ y_square = torch.matmul(factor.transpose(1, 2), y ** 2)
336
+ y_mu_double = torch.matmul(2.0 * (factor * mu_x).transpose(1, 2), y)
337
+ mu_square = torch.sum(factor * (mu_x ** 2), 1).unsqueeze(-1)
338
+ log_prior = y_square - y_mu_double + mu_square + const
339
+ # it's actually the log likelihood of y given the Gaussian with (mu_x, I)
340
+
341
+ attn = monotonic_align.maximum_path(log_prior, attn_mask.squeeze(1))
342
+ attn = attn.detach()
343
+
344
+ # Compute loss between predicted log-scaled durations and those obtained from MAS
345
+ logw_ = torch.log(1e-8 + torch.sum(attn.unsqueeze(1), -1)) * x_mask
346
+ dur_loss = duration_loss(logw, logw_, x_lengths)
347
+ # print(attn.shape)
348
+
349
+ # Cut a small segment of mel-spectrogram in order to increase batch size
350
+ if not isinstance(out_size, type(None)):
351
+ clip_size = min(out_size, y_max_length) # when out_size > max length, do not actually perform clipping
352
+ clip_size = -fix_len_compatibility(-clip_size) # this is to ensure dividable
353
+ max_offset = (y_lengths - clip_size).clamp(0)
354
+ offset_ranges = list(zip([0] * max_offset.shape[0], max_offset.cpu().numpy()))
355
+ out_offset = torch.LongTensor([
356
+ torch.tensor(random.choice(range(start, end)) if end > start else 0)
357
+ for start, end in offset_ranges
358
+ ]).to(y_lengths)
359
+
360
+ attn_cut = torch.zeros(attn.shape[0], attn.shape[1], clip_size, dtype=attn.dtype, device=attn.device)
361
+ y_cut = torch.zeros(y.shape[0], self.n_feats, clip_size, dtype=y.dtype, device=y.device)
362
+ y_cut_lengths = []
363
+ for i, (y_, out_offset_) in enumerate(zip(y, out_offset)):
364
+ y_cut_length = clip_size + (y_lengths[i] - clip_size).clamp(None, 0)
365
+ y_cut_lengths.append(y_cut_length)
366
+ cut_lower, cut_upper = out_offset_, out_offset_ + y_cut_length
367
+ y_cut[i, :, :y_cut_length] = y_[:, cut_lower:cut_upper]
368
+ attn_cut[i, :, :y_cut_length] = attn[i, :, cut_lower:cut_upper]
369
+ y_cut_lengths = torch.LongTensor(y_cut_lengths)
370
+ y_cut_mask = sequence_mask(y_cut_lengths).unsqueeze(1).to(y_mask)
371
+
372
+ attn = attn_cut # attn -> [B, text_length, cut_length]. It does not begin from top left corner
373
+ y = y_cut
374
+ y_mask = y_cut_mask
375
+
376
+ # Align encoded text with mel-spectrogram and get mu_y segment
377
+ mu_y = torch.matmul(attn.squeeze(1).transpose(1, 2), mu_x.transpose(1, 2)) # here mu_x is not cut.
378
+ mu_y = mu_y.transpose(1, 2) # B, 80, cut_length
379
+
380
+ # Compute loss of score-based decoder
381
+ # print(y.shape, y_mask.shape, mu_y.shape)
382
+ diff_loss, xt = self.decoder.compute_loss(y, y_mask, mu_y, spk)
383
+
384
+ # Compute loss between aligned encoder outputs and mel-spectrogram
385
+ prior_loss = torch.sum(0.5 * ((y - mu_y) ** 2 + math.log(2 * math.pi)) * y_mask)
386
+ prior_loss = prior_loss / (torch.sum(y_mask) * self.n_feats)
387
+
388
+ return dur_loss, prior_loss, diff_loss
389
+
390
+
391
+ class GradTTSXvector(BaseModule):
392
+ def __init__(self, n_vocab=148, spk_emb_dim=64,
393
+ n_enc_channels=192, filter_channels=768, filter_channels_dp=256,
394
+ n_heads=2, n_enc_layers=6, enc_kernel=3, enc_dropout=0.1, window_size=4,
395
+ n_feats=80, dec_dim=64, beta_min=0.05, beta_max=20.0, pe_scale=1000, xvector_dim=512, **kwargs):
396
+ super(GradTTSXvector, self).__init__()
397
+ self.n_vocab = n_vocab
398
+ # self.n_spks = n_spks
399
+ self.spk_emb_dim = spk_emb_dim
400
+ self.n_enc_channels = n_enc_channels
401
+ self.filter_channels = filter_channels
402
+ self.filter_channels_dp = filter_channels_dp
403
+ self.n_heads = n_heads
404
+ self.n_enc_layers = n_enc_layers
405
+ self.enc_kernel = enc_kernel
406
+ self.enc_dropout = enc_dropout
407
+ self.window_size = window_size
408
+ self.n_feats = n_feats
409
+ self.dec_dim = dec_dim
410
+ self.beta_min = beta_min
411
+ self.beta_max = beta_max
412
+ self.pe_scale = pe_scale
413
+
414
+ self.xvector_proj = torch.nn.Linear(xvector_dim, spk_emb_dim)
415
+ self.encoder = TextEncoder(n_vocab, n_feats, n_enc_channels,
416
+ filter_channels, filter_channels_dp, n_heads,
417
+ n_enc_layers, enc_kernel, enc_dropout, window_size,
418
+ spk_emb_dim=spk_emb_dim, n_spks=999) # NOTE: not important `n_spk`
419
+ self.decoder = Diffusion(n_feats, dec_dim, spk_emb_dim, beta_min, beta_max, pe_scale)
420
+
421
+ @torch.no_grad()
422
+ def forward(self, x, x_lengths, n_timesteps, temperature=1.0, stoc=False, spk=None, length_scale=1.0):
423
+ """
424
+ Generates mel-spectrogram from text. Returns:
425
+ 1. encoder outputs
426
+ 2. decoder outputs
427
+ 3. generated alignment
428
+
429
+ Args:
430
+ x (torch.Tensor): batch of texts, converted to a tensor with phoneme embedding ids.
431
+ x_lengths (torch.Tensor): lengths of texts in batch.
432
+ n_timesteps (int): number of steps to use for reverse diffusion in decoder.
433
+ temperature (float, optional): controls variance of terminal distribution.
434
+ stoc (bool, optional): flag that adds stochastic term to the decoder sampler.
435
+ Usually, does not provide synthesis improvements.
436
+ length_scale (float, optional): controls speech pace.
437
+ Increase value to slow down generated speech and vice versa.
438
+ spk: actually the xvectors
439
+ """
440
+ x, x_lengths = self.relocate_input([x, x_lengths])
441
+
442
+ spk = self.xvector_proj(spk) # NOTE: use x-vectors instead of speaker embedding
443
+
444
+ # Get encoder_outputs `mu_x` and log-scaled token durations `logw`
445
+ mu_x, logw, x_mask = self.encoder(x, x_lengths, spk)
446
+
447
+ w = torch.exp(logw) * x_mask
448
+ w_ceil = torch.ceil(w) * length_scale
449
+ y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long()
450
+ y_max_length = int(y_lengths.max())
451
+ y_max_length_ = fix_len_compatibility(y_max_length)
452
+
453
+ # Using obtained durations `w` construct alignment map `attn`
454
+ y_mask = sequence_mask(y_lengths, y_max_length_).unsqueeze(1).to(x_mask.dtype)
455
+ attn_mask = x_mask.unsqueeze(-1) * y_mask.unsqueeze(2)
456
+ attn = generate_path(w_ceil.squeeze(1), attn_mask.squeeze(1)).unsqueeze(1)
457
+
458
+ # Align encoded text and get mu_y
459
+ mu_y = torch.matmul(attn.squeeze(1).transpose(1, 2), mu_x.transpose(1, 2))
460
+ mu_y = mu_y.transpose(1, 2)
461
+ encoder_outputs = mu_y[:, :, :y_max_length]
462
+
463
+ # Sample latent representation from terminal distribution N(mu_y, I)
464
+ z = mu_y + torch.randn_like(mu_y, device=mu_y.device) / temperature
465
+ # Generate sample by performing reverse dynamics
466
+ decoder_outputs = self.decoder(z, y_mask, mu_y, n_timesteps, stoc, spk)
467
+ decoder_outputs = decoder_outputs[:, :, :y_max_length]
468
+
469
+ return encoder_outputs, decoder_outputs, attn[:, :, :y_max_length]
470
+
471
+ def compute_loss(self, x, x_lengths, y, y_lengths, spk=None, out_size=None, use_gt_dur=False, durs=None):
472
+ """
473
+ Computes 3 losses:
474
+ 1. duration loss: loss between predicted token durations and those extracted by Monotonic Alignment Search (MAS).
475
+ 2. prior loss: loss between mel-spectrogram and encoder outputs.
476
+ 3. diffusion loss: loss between gaussian noise and its reconstruction by diffusion-based decoder.
477
+
478
+ Args:
479
+ x (torch.Tensor): batch of texts, converted to a tensor with phoneme embedding ids.
480
+ x_lengths (torch.Tensor): lengths of texts in batch.
481
+ y (torch.Tensor): batch of corresponding mel-spectrograms.
482
+ y_lengths (torch.Tensor): lengths of mel-spectrograms in batch.
483
+ out_size (int, optional): length (in mel's sampling rate) of segment to cut, on which decoder will be trained.
484
+ Should be divisible by 2^{num of UNet downsamplings}. Needed to increase batch size.
485
+ spk: xvector
486
+ use_gt_dur: bool
487
+ durs: gt duration
488
+ """
489
+ x, x_lengths, y, y_lengths = self.relocate_input([x, x_lengths, y, y_lengths])
490
+
491
+ spk = self.xvector_proj(spk) # NOTE: use x-vectors instead of speaker embedding
492
+
493
+ # Get encoder_outputs `mu_x` and log-scaled token durations `logw`
494
+ mu_x, logw, x_mask = self.encoder(x, x_lengths, spk)
495
+ y_max_length = y.shape[-1]
496
+
497
+ y_mask = sequence_mask(y_lengths, y_max_length).unsqueeze(1).to(x_mask)
498
+ attn_mask = x_mask.unsqueeze(-1) * y_mask.unsqueeze(2)
499
+
500
+ # Use MAS to find most likely alignment `attn` between text and mel-spectrogram
501
+ if not use_gt_dur:
502
+ with torch.no_grad():
503
+ const = -0.5 * math.log(2 * math.pi) * self.n_feats
504
+ factor = -0.5 * torch.ones(mu_x.shape, dtype=mu_x.dtype, device=mu_x.device)
505
+ y_square = torch.matmul(factor.transpose(1, 2), y ** 2)
506
+ y_mu_double = torch.matmul(2.0 * (factor * mu_x).transpose(1, 2), y)
507
+ mu_square = torch.sum(factor * (mu_x ** 2), 1).unsqueeze(-1)
508
+ log_prior = y_square - y_mu_double + mu_square + const
509
+
510
+ attn = monotonic_align.maximum_path(log_prior, attn_mask.squeeze(1))
511
+ attn = attn.detach()
512
+ else:
513
+ with torch.no_grad():
514
+ attn = generate_path(durs, attn_mask.squeeze(1)).detach()
515
+
516
+ # Compute loss between predicted log-scaled durations and those obtained from MAS
517
+ logw_ = torch.log(1e-8 + torch.sum(attn.unsqueeze(1), -1)) * x_mask
518
+ dur_loss = duration_loss(logw, logw_, x_lengths)
519
+
520
+ # print(attn.shape)
521
+
522
+ # Cut a small segment of mel-spectrogram in order to increase batch size
523
+ if not isinstance(out_size, type(None)):
524
+ max_offset = (y_lengths - out_size).clamp(0)
525
+ offset_ranges = list(zip([0] * max_offset.shape[0], max_offset.cpu().numpy()))
526
+ out_offset = torch.LongTensor([
527
+ torch.tensor(random.choice(range(start, end)) if end > start else 0)
528
+ for start, end in offset_ranges
529
+ ]).to(y_lengths)
530
+
531
+ attn_cut = torch.zeros(attn.shape[0], attn.shape[1], out_size, dtype=attn.dtype, device=attn.device)
532
+ y_cut = torch.zeros(y.shape[0], self.n_feats, out_size, dtype=y.dtype, device=y.device)
533
+ y_cut_lengths = []
534
+ for i, (y_, out_offset_) in enumerate(zip(y, out_offset)):
535
+ y_cut_length = out_size + (y_lengths[i] - out_size).clamp(None, 0)
536
+ y_cut_lengths.append(y_cut_length)
537
+ cut_lower, cut_upper = out_offset_, out_offset_ + y_cut_length
538
+ y_cut[i, :, :y_cut_length] = y_[:, cut_lower:cut_upper]
539
+ attn_cut[i, :, :y_cut_length] = attn[i, :, cut_lower:cut_upper]
540
+ y_cut_lengths = torch.LongTensor(y_cut_lengths)
541
+ y_cut_mask = sequence_mask(y_cut_lengths).unsqueeze(1).to(y_mask)
542
+
543
+ attn = attn_cut
544
+ y = y_cut
545
+ y_mask = y_cut_mask
546
+
547
+ # Align encoded text with mel-spectrogram and get mu_y segment
548
+ mu_y = torch.matmul(attn.squeeze(1).transpose(1, 2), mu_x.transpose(1, 2))
549
+ mu_y = mu_y.transpose(1, 2)
550
+
551
+ # Compute loss of score-based decoder
552
+ diff_loss, xt = self.decoder.compute_loss(y, y_mask, mu_y, spk)
553
+
554
+ # Compute loss between aligned encoder outputs and mel-spectrogram
555
+ prior_loss = torch.sum(0.5 * ((y - mu_y) ** 2 + math.log(2 * math.pi)) * y_mask)
556
+ prior_loss = prior_loss / (torch.sum(y_mask) * self.n_feats)
557
+
558
+ return dur_loss, prior_loss, diff_loss
model/utils.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ from https://github.com/jaywalnut310/glow-tts """
2
+
3
+ import torch
4
+
5
+
6
+ def sequence_mask(length, max_length=None):
7
+ if max_length is None:
8
+ max_length = length.max()
9
+ x = torch.arange(int(max_length), dtype=length.dtype, device=length.device)
10
+ return x.unsqueeze(0) < length.unsqueeze(1)
11
+
12
+
13
+ def fix_len_compatibility(length, num_downsamplings_in_unet=2):
14
+ while True:
15
+ if length % (2**num_downsamplings_in_unet) == 0:
16
+ return length
17
+ length += 1
18
+
19
+
20
+ def convert_pad_shape(pad_shape):
21
+ l = pad_shape[::-1]
22
+ pad_shape = [item for sublist in l for item in sublist]
23
+ return pad_shape
24
+
25
+
26
+ def generate_path(duration, mask):
27
+ device = duration.device
28
+
29
+ b, t_x, t_y = mask.shape
30
+ cum_duration = torch.cumsum(duration, 1)
31
+ path = torch.zeros(b, t_x, t_y, dtype=mask.dtype).to(device=device)
32
+
33
+ cum_duration_flat = cum_duration.view(b * t_x)
34
+ path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype)
35
+ path = path.view(b, t_x, t_y)
36
+ path = path - torch.nn.functional.pad(path, convert_pad_shape([[0, 0],
37
+ [1, 0], [0, 0]]))[:, :-1]
38
+ path = path * mask
39
+ return path
40
+
41
+
42
+ def duration_loss(logw, logw_, lengths):
43
+ loss = torch.sum((logw - logw_)**2) / torch.sum(lengths)
44
+ return loss
models.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ import torch.nn as nn
4
+ from torch.nn import Conv1d, ConvTranspose1d, AvgPool1d, Conv2d
5
+ from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
6
+ from xutils import init_weights, get_padding
7
+
8
+ LRELU_SLOPE = 0.1
9
+
10
+
11
+ class ResBlock1(torch.nn.Module):
12
+ def __init__(self, h, channels, kernel_size=3, dilation=(1, 3, 5)):
13
+ super(ResBlock1, self).__init__()
14
+ self.h = h
15
+ self.convs1 = nn.ModuleList([
16
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
17
+ padding=get_padding(kernel_size, dilation[0]))),
18
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
19
+ padding=get_padding(kernel_size, dilation[1]))),
20
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[2],
21
+ padding=get_padding(kernel_size, dilation[2])))
22
+ ])
23
+ self.convs1.apply(init_weights)
24
+
25
+ self.convs2 = nn.ModuleList([
26
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
27
+ padding=get_padding(kernel_size, 1))),
28
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
29
+ padding=get_padding(kernel_size, 1))),
30
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=1,
31
+ padding=get_padding(kernel_size, 1)))
32
+ ])
33
+ self.convs2.apply(init_weights)
34
+
35
+ def forward(self, x):
36
+ for c1, c2 in zip(self.convs1, self.convs2):
37
+ xt = F.leaky_relu(x, LRELU_SLOPE)
38
+ xt = c1(xt)
39
+ xt = F.leaky_relu(xt, LRELU_SLOPE)
40
+ xt = c2(xt)
41
+ x = xt + x
42
+ return x
43
+
44
+ def remove_weight_norm(self):
45
+ for l in self.convs1:
46
+ remove_weight_norm(l)
47
+ for l in self.convs2:
48
+ remove_weight_norm(l)
49
+
50
+
51
+ class ResBlock2(torch.nn.Module):
52
+ def __init__(self, h, channels, kernel_size=3, dilation=(1, 3)):
53
+ super(ResBlock2, self).__init__()
54
+ self.h = h
55
+ self.convs = nn.ModuleList([
56
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[0],
57
+ padding=get_padding(kernel_size, dilation[0]))),
58
+ weight_norm(Conv1d(channels, channels, kernel_size, 1, dilation=dilation[1],
59
+ padding=get_padding(kernel_size, dilation[1])))
60
+ ])
61
+ self.convs.apply(init_weights)
62
+
63
+ def forward(self, x):
64
+ for c in self.convs:
65
+ xt = F.leaky_relu(x, LRELU_SLOPE)
66
+ xt = c(xt)
67
+ x = xt + x
68
+ return x
69
+
70
+ def remove_weight_norm(self):
71
+ for l in self.convs:
72
+ remove_weight_norm(l)
73
+
74
+
75
+ class Generator(torch.nn.Module):
76
+ def __init__(self, h):
77
+ super(Generator, self).__init__()
78
+ self.h = h
79
+ self.num_kernels = len(h.resblock_kernel_sizes)
80
+ self.num_upsamples = len(h.upsample_rates)
81
+ self.conv_pre = weight_norm(Conv1d(80, h.upsample_initial_channel, 7, 1, padding=3))
82
+ resblock = ResBlock1 if h.resblock == '1' else ResBlock2
83
+
84
+ self.ups = nn.ModuleList()
85
+ for i, (u, k) in enumerate(zip(h.upsample_rates, h.upsample_kernel_sizes)):
86
+ self.ups.append(weight_norm(
87
+ ConvTranspose1d(h.upsample_initial_channel//(2**i), h.upsample_initial_channel//(2**(i+1)),
88
+ k, u, padding=(k-u)//2)))
89
+
90
+ self.resblocks = nn.ModuleList()
91
+ for i in range(len(self.ups)):
92
+ ch = h.upsample_initial_channel//(2**(i+1))
93
+ for j, (k, d) in enumerate(zip(h.resblock_kernel_sizes, h.resblock_dilation_sizes)):
94
+ self.resblocks.append(resblock(h, ch, k, d))
95
+
96
+ self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
97
+ self.ups.apply(init_weights)
98
+ self.conv_post.apply(init_weights)
99
+
100
+ def forward(self, x):
101
+ x = self.conv_pre(x)
102
+ for i in range(self.num_upsamples):
103
+ x = F.leaky_relu(x, LRELU_SLOPE)
104
+ x = self.ups[i](x)
105
+ xs = None
106
+ for j in range(self.num_kernels):
107
+ if xs is None:
108
+ xs = self.resblocks[i*self.num_kernels+j](x)
109
+ else:
110
+ xs += self.resblocks[i*self.num_kernels+j](x)
111
+ x = xs / self.num_kernels
112
+ x = F.leaky_relu(x)
113
+ x = self.conv_post(x)
114
+ x = torch.tanh(x)
115
+
116
+ return x
117
+
118
+ def remove_weight_norm(self):
119
+ print('Removing weight norm...')
120
+ for l in self.ups:
121
+ remove_weight_norm(l)
122
+ for l in self.resblocks:
123
+ l.remove_weight_norm()
124
+ remove_weight_norm(self.conv_pre)
125
+ remove_weight_norm(self.conv_post)
126
+
127
+
128
+ class DiscriminatorP(torch.nn.Module):
129
+ def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False):
130
+ super(DiscriminatorP, self).__init__()
131
+ self.period = period
132
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
133
+ self.convs = nn.ModuleList([
134
+ norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
135
+ norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
136
+ norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
137
+ norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(5, 1), 0))),
138
+ norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(2, 0))),
139
+ ])
140
+ self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0)))
141
+
142
+ def forward(self, x):
143
+ fmap = []
144
+
145
+ # 1d to 2d
146
+ b, c, t = x.shape
147
+ if t % self.period != 0: # pad first
148
+ n_pad = self.period - (t % self.period)
149
+ x = F.pad(x, (0, n_pad), "reflect")
150
+ t = t + n_pad
151
+ x = x.view(b, c, t // self.period, self.period)
152
+
153
+ for l in self.convs:
154
+ x = l(x)
155
+ x = F.leaky_relu(x, LRELU_SLOPE)
156
+ fmap.append(x)
157
+ x = self.conv_post(x)
158
+ fmap.append(x)
159
+ x = torch.flatten(x, 1, -1)
160
+
161
+ return x, fmap
162
+
163
+
164
+ class MultiPeriodDiscriminator(torch.nn.Module):
165
+ def __init__(self):
166
+ super(MultiPeriodDiscriminator, self).__init__()
167
+ self.discriminators = nn.ModuleList([
168
+ DiscriminatorP(2),
169
+ DiscriminatorP(3),
170
+ DiscriminatorP(5),
171
+ DiscriminatorP(7),
172
+ DiscriminatorP(11),
173
+ ])
174
+
175
+ def forward(self, y, y_hat):
176
+ y_d_rs = []
177
+ y_d_gs = []
178
+ fmap_rs = []
179
+ fmap_gs = []
180
+ for i, d in enumerate(self.discriminators):
181
+ y_d_r, fmap_r = d(y)
182
+ y_d_g, fmap_g = d(y_hat)
183
+ y_d_rs.append(y_d_r)
184
+ fmap_rs.append(fmap_r)
185
+ y_d_gs.append(y_d_g)
186
+ fmap_gs.append(fmap_g)
187
+
188
+ return y_d_rs, y_d_gs, fmap_rs, fmap_gs
189
+
190
+
191
+ class DiscriminatorS(torch.nn.Module):
192
+ def __init__(self, use_spectral_norm=False):
193
+ super(DiscriminatorS, self).__init__()
194
+ norm_f = weight_norm if use_spectral_norm == False else spectral_norm
195
+ self.convs = nn.ModuleList([
196
+ norm_f(Conv1d(1, 128, 15, 1, padding=7)),
197
+ norm_f(Conv1d(128, 128, 41, 2, groups=4, padding=20)),
198
+ norm_f(Conv1d(128, 256, 41, 2, groups=16, padding=20)),
199
+ norm_f(Conv1d(256, 512, 41, 4, groups=16, padding=20)),
200
+ norm_f(Conv1d(512, 1024, 41, 4, groups=16, padding=20)),
201
+ norm_f(Conv1d(1024, 1024, 41, 1, groups=16, padding=20)),
202
+ norm_f(Conv1d(1024, 1024, 5, 1, padding=2)),
203
+ ])
204
+ self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1))
205
+
206
+ def forward(self, x):
207
+ fmap = []
208
+ for l in self.convs:
209
+ x = l(x)
210
+ x = F.leaky_relu(x, LRELU_SLOPE)
211
+ fmap.append(x)
212
+ x = self.conv_post(x)
213
+ fmap.append(x)
214
+ x = torch.flatten(x, 1, -1)
215
+
216
+ return x, fmap
217
+
218
+
219
+ class MultiScaleDiscriminator(torch.nn.Module):
220
+ def __init__(self):
221
+ super(MultiScaleDiscriminator, self).__init__()
222
+ self.discriminators = nn.ModuleList([
223
+ DiscriminatorS(use_spectral_norm=True),
224
+ DiscriminatorS(),
225
+ DiscriminatorS(),
226
+ ])
227
+ self.meanpools = nn.ModuleList([
228
+ AvgPool1d(4, 2, padding=2),
229
+ AvgPool1d(4, 2, padding=2)
230
+ ])
231
+
232
+ def forward(self, y, y_hat):
233
+ y_d_rs = []
234
+ y_d_gs = []
235
+ fmap_rs = []
236
+ fmap_gs = []
237
+ for i, d in enumerate(self.discriminators):
238
+ if i != 0:
239
+ y = self.meanpools[i-1](y)
240
+ y_hat = self.meanpools[i-1](y_hat)
241
+ y_d_r, fmap_r = d(y)
242
+ y_d_g, fmap_g = d(y_hat)
243
+ y_d_rs.append(y_d_r)
244
+ fmap_rs.append(fmap_r)
245
+ y_d_gs.append(y_d_g)
246
+ fmap_gs.append(fmap_g)
247
+
248
+ return y_d_rs, y_d_gs, fmap_rs, fmap_gs
249
+
250
+
251
+ def feature_loss(fmap_r, fmap_g):
252
+ loss = 0
253
+ for dr, dg in zip(fmap_r, fmap_g):
254
+ for rl, gl in zip(dr, dg):
255
+ loss += torch.mean(torch.abs(rl - gl))
256
+
257
+ return loss*2
258
+
259
+
260
+ def discriminator_loss(disc_real_outputs, disc_generated_outputs):
261
+ loss = 0
262
+ r_losses = []
263
+ g_losses = []
264
+ for dr, dg in zip(disc_real_outputs, disc_generated_outputs):
265
+ r_loss = torch.mean((1-dr)**2)
266
+ g_loss = torch.mean(dg**2)
267
+ loss += (r_loss + g_loss)
268
+ r_losses.append(r_loss.item())
269
+ g_losses.append(g_loss.item())
270
+
271
+ return loss, r_losses, g_losses
272
+
273
+
274
+ def generator_loss(disc_outputs):
275
+ loss = 0
276
+ gen_losses = []
277
+ for dg in disc_outputs:
278
+ l = torch.mean((1-dg)**2)
279
+ gen_losses.append(l)
280
+ loss += l
281
+
282
+ return loss, gen_losses
283
+
text/.DS_Store ADDED
Binary file (6.15 kB). View file
 
text/LICENSE ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CMUdict
2
+ -------
3
+
4
+ CMUdict (the Carnegie Mellon Pronouncing Dictionary) is a free
5
+ pronouncing dictionary of English, suitable for uses in speech
6
+ technology and is maintained by the Speech Group in the School of
7
+ Computer Science at Carnegie Mellon University.
8
+
9
+ The Carnegie Mellon Speech Group does not guarantee the accuracy of
10
+ this dictionary, nor its suitability for any specific purpose. In
11
+ fact, we expect a number of errors, omissions and inconsistencies to
12
+ remain in the dictionary. We intend to continually update the
13
+ dictionary by correction existing entries and by adding new ones. From
14
+ time to time a new major version will be released.
15
+
16
+ We welcome input from users: Please send email to Alex Rudnicky
17
18
+
19
+ The Carnegie Mellon Pronouncing Dictionary, in its current and
20
+ previous versions is Copyright (C) 1993-2014 by Carnegie Mellon
21
+ University. Use of this dictionary for any research or commercial
22
+ purpose is completely unrestricted. If you make use of or
23
+ redistribute this material we request that you acknowledge its
24
+ origin in your descriptions.
25
+
26
+ If you add words to or correct words in your version of this
27
+ dictionary, we would appreciate it if you could send these additions
28
+ and corrections to us ([email protected]) for consideration in a
29
+ subsequent version. All submissions will be reviewed and approved by
30
+ the current maintainer, Alex Rudnicky at Carnegie Mellon.
text/__init__.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ from https://github.com/keithito/tacotron """
2
+
3
+ import re
4
+ from text import cleaners
5
+ from text.symbols import symbols
6
+ import torch
7
+
8
+
9
+ _symbol_to_id = {s: i for i, s in enumerate(symbols)}
10
+ _id_to_symbol = {i: s for i, s in enumerate(symbols)}
11
+
12
+ _curly_re = re.compile(r'(.*?)\{(.+?)\}(.*)')
13
+
14
+
15
+ def get_arpabet(word, dictionary):
16
+ word_arpabet = dictionary.lookup(word)
17
+ if word_arpabet is not None:
18
+ return "{" + word_arpabet[0] + "}"
19
+ else:
20
+ return word
21
+
22
+
23
+ def text_to_sequence(text, cleaner_names=["kazakh_cleaners"], dictionary=None):
24
+ '''Converts a string of text to a sequence of IDs corresponding to the symbols in the text.
25
+
26
+ The text can optionally have ARPAbet sequences enclosed in curly braces embedded
27
+ in it. For example, "Turn left on {HH AW1 S S T AH0 N} Street."
28
+
29
+ Args:
30
+ text: string to convert to a sequence
31
+ cleaner_names: names of the cleaner functions to run the text through
32
+ dictionary: arpabet class with arpabet dictionary
33
+
34
+ Returns:
35
+ List of integers corresponding to the symbols in the text
36
+ '''
37
+ sequence = []
38
+ space = _symbols_to_sequence(' ')
39
+ # Check for curly braces and treat their contents as ARPAbet:
40
+ while len(text):
41
+ m = _curly_re.match(text)
42
+ if not m:
43
+ clean_text = _clean_text(text, cleaner_names)
44
+ #clean_text = text
45
+ if dictionary is not None:
46
+ clean_text = [get_arpabet(w, dictionary) for w in clean_text.split(" ")]
47
+ for i in range(len(clean_text)):
48
+ t = clean_text[i]
49
+ if t.startswith("{"):
50
+ sequence += _arpabet_to_sequence(t[1:-1])
51
+ else:
52
+ sequence += _symbols_to_sequence(t)
53
+ sequence += space
54
+ else:
55
+ sequence += _symbols_to_sequence(clean_text)
56
+ break
57
+ sequence += _symbols_to_sequence(_clean_text(m.group(1), cleaner_names))
58
+ sequence += _arpabet_to_sequence(m.group(2))
59
+ text = m.group(3)
60
+
61
+ # remove trailing space
62
+ if dictionary is not None:
63
+ sequence = sequence[:-1] if sequence[-1] == space[0] else sequence
64
+ return sequence
65
+
66
+
67
+ def sequence_to_text(sequence):
68
+ '''Converts a sequence of IDs back to a string'''
69
+ result = ''
70
+ for symbol_id in sequence:
71
+ if symbol_id in _id_to_symbol:
72
+ s = _id_to_symbol[symbol_id]
73
+ # Enclose ARPAbet back in curly braces:
74
+ if len(s) > 1 and s[0] == '@':
75
+ s = '{%s}' % s[1:]
76
+ result += s
77
+ return result.replace('}{', ' ')
78
+
79
+ def convert_text(string):
80
+ text_norm = text_to_sequence(string.lower())
81
+ text_norm = torch.IntTensor(text_norm)
82
+ text_len = torch.IntTensor([text_norm.size(0)])
83
+ text_padded = torch.LongTensor(1, len(text_norm))
84
+ text_padded.zero_()
85
+ text_padded[0, :text_norm.size(0)] = text_norm
86
+ return text_padded, text_len
87
+
88
+ def _clean_text(text, cleaner_names):
89
+ for name in cleaner_names:
90
+ cleaner = getattr(cleaners, name)
91
+ if not cleaner:
92
+ raise Exception('Unknown cleaner: %s' % name)
93
+ text = cleaner(text)
94
+ return text
95
+
96
+
97
+ def _symbols_to_sequence(symbols):
98
+ return [_symbol_to_id[s] for s in symbols if _should_keep_symbol(s)]
99
+
100
+
101
+ def _arpabet_to_sequence(text):
102
+ return _symbols_to_sequence(['@' + s for s in text.split()])
103
+
104
+
105
+ def _should_keep_symbol(s):
106
+ return s in _symbol_to_id and s != '_' and s != '~'