# Copyright (c) 2023 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import json
import numpy as np
import os
import torch
import copy
from g2p_en import G2p
import re
import unicodedata
from g2p_en import G2p
from g2p_en.expand import normalize_numbers

g2p = G2p()

PHONE_SET = [
    "!",
    ",",
    ".",
    ".B",
    ":",
    "<BOS>",
    "<EOS>",
    "<PAD>",
    "<UNK>",
    "?",
    "AA0B",
    "AA0E",
    "AA0I",
    "AA1B",
    "AA1E",
    "AA1I",
    "AA2B",
    "AA2E",
    "AA2I",
    "AE0B",
    "AE0E",
    "AE0I",
    "AE1B",
    "AE1E",
    "AE1I",
    "AE2B",
    "AE2E",
    "AE2I",
    "AH0B",
    "AH0E",
    "AH0I",
    "AH1B",
    "AH1E",
    "AH1I",
    "AH2B",
    "AH2E",
    "AH2I",
    "AO0B",
    "AO0E",
    "AO0I",
    "AO1",
    "AO1B",
    "AO1E",
    "AO1I",
    "AO2B",
    "AO2E",
    "AO2I",
    "AW0B",
    "AW0E",
    "AW0I",
    "AW1B",
    "AW1E",
    "AW1I",
    "AW2B",
    "AW2E",
    "AW2I",
    "AY0B",
    "AY0E",
    "AY0I",
    "AY1B",
    "AY1E",
    "AY1I",
    "AY2B",
    "AY2E",
    "AY2I",
    "BB",
    "BE",
    "BI",
    "CHB",
    "CHE",
    "CHI",
    "DB",
    "DE",
    "DHB",
    "DHE",
    "DHI",
    "DI",
    "EH0B",
    "EH0E",
    "EH0I",
    "EH1B",
    "EH1E",
    "EH1I",
    "EH2B",
    "EH2E",
    "EH2I",
    "ER0B",
    "ER0E",
    "ER0I",
    "ER1B",
    "ER1E",
    "ER1I",
    "ER2B",
    "ER2E",
    "ER2I",
    "EY0B",
    "EY0E",
    "EY0I",
    "EY1B",
    "EY1E",
    "EY1I",
    "EY2B",
    "EY2E",
    "EY2I",
    "FB",
    "FE",
    "FI",
    "GB",
    "GE",
    "GI",
    "HHB",
    "HHE",
    "HHI",
    "IH0B",
    "IH0E",
    "IH0I",
    "IH1B",
    "IH1E",
    "IH1I",
    "IH2B",
    "IH2E",
    "IH2I",
    "IY0B",
    "IY0E",
    "IY0I",
    "IY1B",
    "IY1E",
    "IY1I",
    "IY2B",
    "IY2E",
    "IY2I",
    "JHB",
    "JHE",
    "JHI",
    "KB",
    "KE",
    "KI",
    "L",
    "LB",
    "LE",
    "LI",
    "MB",
    "ME",
    "MI",
    "NB",
    "NE",
    "NGB",
    "NGE",
    "NGI",
    "NI",
    "OW0B",
    "OW0E",
    "OW0I",
    "OW1B",
    "OW1E",
    "OW1I",
    "OW2B",
    "OW2E",
    "OW2I",
    "OY0B",
    "OY0E",
    "OY0I",
    "OY1B",
    "OY1E",
    "OY1I",
    "OY2B",
    "OY2E",
    "OY2I",
    "PB",
    "PE",
    "PI",
    "RB",
    "RE",
    "RI",
    "SB",
    "SE",
    "SHB",
    "SHE",
    "SHI",
    "SI",
    "TB",
    "TE",
    "THB",
    "THE",
    "THI",
    "TI",
    "UH0B",
    "UH0E",
    "UH0I",
    "UH1B",
    "UH2B",
    "UH1E",
    "UH1I",
    "UH2E",
    "UH2I",
    "UW0B",
    "UW0E",
    "UW0I",
    "UW1B",
    "UW1E",
    "UW1I",
    "UW2B",
    "UW2E",
    "UW2I",
    "VB",
    "VE",
    "VI",
    "WB",
    "WE",
    "WI",
    "YB",
    "YE",
    "YI",
    "ZB",
    "ZE",
    "ZHB",
    "ZHE",
    "ZHI",
    "ZI",
    "|",
]
PHPONE2ID = {PHONE_SET[i]: i for i in range(len(PHONE_SET))}

PUNCS = "!,.?;:"


def is_sil_phoneme(p):
    return p == "" or not p[0].isalpha()


def add_bdr(txt_struct):
    txt_struct_ = []
    for i, ts in enumerate(txt_struct):
        txt_struct_.append(ts)
        if (
            i != len(txt_struct) - 1
            and not is_sil_phoneme(txt_struct[i][0])
            and not is_sil_phoneme(txt_struct[i + 1][0])
        ):
            txt_struct_.append(["|", ["|"]])
    return txt_struct_


def preprocess_text(text):
    text = normalize_numbers(text)
    text = "".join(
        char
        for char in unicodedata.normalize("NFD", text)
        if unicodedata.category(char) != "Mn"
    )  # Strip accents
    text = text.lower()
    text = re.sub("['\"()]+", "", text)
    text = re.sub("[-]+", " ", text)
    text = re.sub(f"[^ a-z{PUNCS}]", "", text)
    text = re.sub(f" ?([{PUNCS}]) ?", r"\1", text)  # !! -> !
    text = re.sub(f"([{PUNCS}])+", r"\1", text)  # !! -> !
    text = text.replace("i.e.", "that is")
    text = text.replace("i.e.", "that is")
    text = text.replace("etc.", "etc")
    text = re.sub(f"([{PUNCS}])", r" ", text)  # remove punctuations for now
    text = re.sub(rf"\s+", r" ", text)
    return text


def postprocess(txt_struct):
    while len(txt_struct) > 0 and is_sil_phoneme(txt_struct[0][0]):
        txt_struct = txt_struct[1:]
    while len(txt_struct) > 0 and is_sil_phoneme(txt_struct[-1][0]):
        txt_struct = txt_struct[:-1]
    txt_struct = add_bdr(txt_struct)
    txt_struct = [["<BOS>", ["<BOS>"]]] + txt_struct + [["<EOS>", ["<EOS>"]]]
    return txt_struct


def process(txt, g2p):
    txt = preprocess_text(txt).strip()
    phs = g2p(txt)
    txt_struct = [[w, []] for w in txt.split(" ")]
    i_word = 0
    for p in phs:
        if p == " ":
            i_word += 1
        else:
            txt_struct[i_word][1].append(p)

    txt_struct_ret = copy.deepcopy(txt_struct)

    for i_word in range(len(txt_struct)):
        if not is_sil_phoneme(txt_struct[i_word][0]):
            if len(txt_struct[i_word][1]) > 1:
                txt_struct_ret[i_word][1][0] += "B"
                for i in range(1, len(txt_struct[i_word][1]) - 1):
                    txt_struct_ret[i_word][1][i] += "I"
                txt_struct_ret[i_word][1][-1] += "E"
            else:
                txt_struct_ret[i_word][1][0] += "B"

    txt_struct_ret = postprocess(txt_struct_ret)

    return txt_struct_ret, txt


def test():
    g2p = G2p()
    txt = "This is a test sentence."
    txt_struct, txt = process(txt, g2p)
    print(txt_struct)
    print(txt)
    phone_seq = [p for w in txt_struct for p in w[1]]
    print(phone_seq)
    phone_id = [PHPONE2ID[p] for p in phone_seq]
    print(phone_id)


class G2pProcessor:
    def __init__(self):
        self.g2p = G2p()

    def __call__(self, txt, lang="en"):
        return self.txt2phoneid(txt)

    def txt2phoneid(self, txt):
        txt_struct, txt = process(txt, self.g2p)
        phone_seq = [p for w in txt_struct for p in w[1]]
        phone_id = [PHPONE2ID[p] for p in phone_seq]
        return None, phone_id

    def phoneid2txt(self, phone_id):
        txt = []
        for i in phone_id:
            txt.append(PHONE_SET[i])
        return txt


if __name__ == "__main__":
    g2p = G2pProcessor()
    txt = "This is a test sentence."
    phoneid = g2p.txt2phoneid(txt)[1]
    # output: [5, 73, 118, 175, 218, 116, 213, 218, 28, 218, 180, 82, 179, 181, 218, 174, 82, 149, 185, 30, 149, 175, 6]
    # print(phoneid)
    print(g2p.phoneid2txt(phoneid))
    # output: ['<BOS>', 'DHB', 'IH1I', 'SE', '|', 'IH1B', 'ZE', '|', 'AH0B', '|', 'TB', 'EH1I', 'SI', 'TE', '|', 'SB', 'EH1I', 'NI', 'TI', 'AH0I', 'NI', 'SE', '<EOS>']
    print(len(PHONE_SET))
    # output: 219