Spaces:

cdactvm
/

Tamil_ASR_Demo

Running

File size: 3,159 Bytes

0655b26

import nbimporter
from isNumber import is_number  # Remove or replace this if unnecessary

def text_to_int(textnum, numwords={}):
    # Define units, tens, and scales including "lac"
    units = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight',
            'nine', 'ten', 'eleven', 'twelve', 'thirteen', 'fourteen', 'fifteen',
            'sixteen', 'seventeen', 'eighteen', 'nineteen']
    tens = ['', '', 'twenty', 'thirty', 'forty', 'fifty', 'sixty', 'seventy', 'eighty', 'ninety']
    scales = ['hundred', 'thousand', 'lac', 'million', 'billion', 'trillion']  # "lac" added
    ordinal_words = {'first': 1, 'second': 2, 'third': 3, 'fifth': 5, 'eighth': 8, 'ninth': 9, 'twelfth': 12}
    ordinal_endings = [('ieth', 'y'), ('th', '')]

    if not numwords:
        numwords['and'] = (1, 0)  # Handle "one hundred and twenty"
        
        # Add units, tens, and scales to numwords
        for idx, word in enumerate(units):
            numwords[word] = (1, idx)
        for idx, word in enumerate(tens):
            numwords[word] = (1, idx * 10)
        
        for idx, word in enumerate(scales):
            numwords[word] = (10 ** (5 if word == 'lac' else idx * 3 or 2), 0)  # Handle "lac" as 10^5

    # Remove hyphens and normalize input
    textnum = textnum.replace('-', ' ')

    current = result = 0
    curstring = ''
    onnumber = False
    lastunit = False
    lastscale = False

    def is_numword(x):
        return is_number(x) or x in numwords

    def from_numword(x):
        if is_number(x):
            return 0, int(x.replace(',', ''))
        return numwords[x]

    for word in textnum.split():
        if word in ordinal_words:
            scale, increment = (1, ordinal_words[word])
            current = current * scale + increment
            if scale > 100:
                result += current
                current = 0
            onnumber = True
            lastunit = False
            lastscale = False
        else:
            for ending, replacement in ordinal_endings:
                if word.endswith(ending):
                    word = f"{word[:-len(ending)]}{replacement}"

            if not is_numword(word) or (word == 'and' and not lastscale):
                if onnumber:
                    curstring += repr(result + current) + " "
                curstring += word + " "
                result = current = 0
                onnumber = False
                lastunit = False
                lastscale = False
            else:
                scale, increment = from_numword(word)
                onnumber = True

                if lastunit and word not in scales:
                    curstring += repr(result + current) + " "
                    result = current = 0

                if scale > 1:
                    current = max(1, current)

                current = current * scale + increment

                if scale >= 100:
                    result += current
                    current = 0

                lastscale = word in scales
                lastunit = word in units

    if onnumber:
        curstring += repr(result + current)

    return curstring.strip()