# Copyright (c) 2024 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

import re
from unidecode import unidecode
import inflect

"""
    Text clean time
"""
_inflect = inflect.engine()
_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
_percent_number_re = re.compile(r"([0-9\.\,]*[0-9]+%)")
_pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
_dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
_fraction_re = re.compile(r"([0-9]+)/([0-9]+)")
_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
_number_re = re.compile(r"[0-9]+")

# List of (regular expression, replacement) pairs for abbreviations:
_abbreviations = [
    (re.compile("\\b%s\\b" % x[0], re.IGNORECASE), x[1])
    for x in [
        ("mrs", "misess"),
        ("mr", "mister"),
        ("dr", "doctor"),
        ("st", "saint"),
        ("co", "company"),
        ("jr", "junior"),
        ("maj", "major"),
        ("gen", "general"),
        ("drs", "doctors"),
        ("rev", "reverend"),
        ("lt", "lieutenant"),
        ("hon", "honorable"),
        ("sgt", "sergeant"),
        ("capt", "captain"),
        ("esq", "esquire"),
        ("ltd", "limited"),
        ("col", "colonel"),
        ("ft", "fort"),
        ("etc", "et cetera"),
        ("btw", "by the way"),
    ]
]

_special_map = [
    ("t|ɹ", "tɹ"),
    ("d|ɹ", "dɹ"),
    ("t|s", "ts"),
    ("d|z", "dz"),
    ("ɪ|ɹ", "ɪɹ"),
    ("ɐ", "ɚ"),
    ("ᵻ", "ɪ"),
    ("əl", "l"),
    ("x", "k"),
    ("ɬ", "l"),
    ("ʔ", "t"),
    ("n̩", "n"),
    ("oː|ɹ", "oːɹ"),
]


def expand_abbreviations(text):
    for regex, replacement in _abbreviations:
        text = re.sub(regex, replacement, text)
    return text


def _remove_commas(m):
    return m.group(1).replace(",", "")


def _expand_decimal_point(m):
    return m.group(1).replace(".", " point ")


def _expand_percent(m):
    return m.group(1).replace("%", " percent ")


def _expand_dollars(m):
    match = m.group(1)
    parts = match.split(".")
    if len(parts) > 2:
        return " " + match + " dollars "  # Unexpected format
    dollars = int(parts[0]) if parts[0] else 0
    cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
    if dollars and cents:
        dollar_unit = "dollar" if dollars == 1 else "dollars"
        cent_unit = "cent" if cents == 1 else "cents"
        return " %s %s, %s %s " % (dollars, dollar_unit, cents, cent_unit)
    elif dollars:
        dollar_unit = "dollar" if dollars == 1 else "dollars"
        return " %s %s " % (dollars, dollar_unit)
    elif cents:
        cent_unit = "cent" if cents == 1 else "cents"
        return " %s %s " % (cents, cent_unit)
    else:
        return " zero dollars "


def fraction_to_words(numerator, denominator):
    if numerator == 1 and denominator == 2:
        return " one half "
    if numerator == 1 and denominator == 4:
        return " one quarter "
    if denominator == 2:
        return " " + _inflect.number_to_words(numerator) + " halves "
    if denominator == 4:
        return " " + _inflect.number_to_words(numerator) + " quarters "
    return (
        " "
        + _inflect.number_to_words(numerator)
        + " "
        + _inflect.ordinal(_inflect.number_to_words(denominator))
        + " "
    )


def _expand_fraction(m):
    numerator = int(m.group(1))
    denominator = int(m.group(2))
    return fraction_to_words(numerator, denominator)


def _expand_ordinal(m):
    return " " + _inflect.number_to_words(m.group(0)) + " "


def _expand_number(m):
    num = int(m.group(0))
    if num > 1000 and num < 3000:
        if num == 2000:
            return " two thousand "
        elif num > 2000 and num < 2010:
            return " two thousand " + _inflect.number_to_words(num % 100) + " "
        elif num % 100 == 0:
            return " " + _inflect.number_to_words(num // 100) + " hundred "
        else:
            return (
                " "
                + _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(
                    ", ", " "
                )
                + " "
            )
    else:
        return " " + _inflect.number_to_words(num, andword="") + " "


# Normalize numbers pronunciation
def normalize_numbers(text):
    text = re.sub(_comma_number_re, _remove_commas, text)
    text = re.sub(_pounds_re, r"\1 pounds", text)
    text = re.sub(_dollars_re, _expand_dollars, text)
    text = re.sub(_fraction_re, _expand_fraction, text)
    text = re.sub(_decimal_number_re, _expand_decimal_point, text)
    text = re.sub(_percent_number_re, _expand_percent, text)
    text = re.sub(_ordinal_re, _expand_ordinal, text)
    text = re.sub(_number_re, _expand_number, text)
    return text


def _english_to_ipa(text):
    # text = unidecode(text).lower()
    text = expand_abbreviations(text)
    text = normalize_numbers(text)
    return text


# special map
def special_map(text):
    for regex, replacement in _special_map:
        regex = regex.replace("|", "\|")
        while re.search(r"(^|[_|]){}([_|]|$)".format(regex), text):
            text = re.sub(
                r"(^|[_|]){}([_|]|$)".format(regex), r"\1{}\2".format(replacement), text
            )
    # text = re.sub(r'([,.!?])', r'|\1', text)
    return text


# Add some special operation
def english_to_ipa(text, text_tokenizer):
    if type(text) == str:
        text = _english_to_ipa(text)
    else:
        text = [_english_to_ipa(t) for t in text]
    phonemes = text_tokenizer(text)
    if phonemes[-1] in "p⁼ʰmftnlkxʃs`ɹaoəɛɪeɑʊŋiuɥwæjː":
        phonemes += "|_"
    if type(text) == str:
        return special_map(phonemes)
    else:
        result_ph = []
        for phone in phonemes:
            result_ph.append(special_map(phone))
        return result_ph