Spaces:
Runtime error
Runtime error
Upload synthesizer/utils/numbers.py with huggingface_hub
Browse files- synthesizer/utils/numbers.py +68 -0
synthesizer/utils/numbers.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import inflect
|
3 |
+
|
4 |
+
_inflect = inflect.engine()
|
5 |
+
_comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
|
6 |
+
_decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
|
7 |
+
_pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
|
8 |
+
_dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
|
9 |
+
_ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
|
10 |
+
_number_re = re.compile(r"[0-9]+")
|
11 |
+
|
12 |
+
|
13 |
+
def _remove_commas(m):
|
14 |
+
return m.group(1).replace(",", "")
|
15 |
+
|
16 |
+
|
17 |
+
def _expand_decimal_point(m):
|
18 |
+
return m.group(1).replace(".", " point ")
|
19 |
+
|
20 |
+
|
21 |
+
def _expand_dollars(m):
|
22 |
+
match = m.group(1)
|
23 |
+
parts = match.split(".")
|
24 |
+
if len(parts) > 2:
|
25 |
+
return match + " dollars" # Unexpected format
|
26 |
+
dollars = int(parts[0]) if parts[0] else 0
|
27 |
+
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
|
28 |
+
if dollars and cents:
|
29 |
+
dollar_unit = "dollar" if dollars == 1 else "dollars"
|
30 |
+
cent_unit = "cent" if cents == 1 else "cents"
|
31 |
+
return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit)
|
32 |
+
elif dollars:
|
33 |
+
dollar_unit = "dollar" if dollars == 1 else "dollars"
|
34 |
+
return "%s %s" % (dollars, dollar_unit)
|
35 |
+
elif cents:
|
36 |
+
cent_unit = "cent" if cents == 1 else "cents"
|
37 |
+
return "%s %s" % (cents, cent_unit)
|
38 |
+
else:
|
39 |
+
return "zero dollars"
|
40 |
+
|
41 |
+
|
42 |
+
def _expand_ordinal(m):
|
43 |
+
return _inflect.number_to_words(m.group(0))
|
44 |
+
|
45 |
+
|
46 |
+
def _expand_number(m):
|
47 |
+
num = int(m.group(0))
|
48 |
+
if num > 1000 and num < 3000:
|
49 |
+
if num == 2000:
|
50 |
+
return "two thousand"
|
51 |
+
elif num > 2000 and num < 2010:
|
52 |
+
return "two thousand " + _inflect.number_to_words(num % 100)
|
53 |
+
elif num % 100 == 0:
|
54 |
+
return _inflect.number_to_words(num // 100) + " hundred"
|
55 |
+
else:
|
56 |
+
return _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(", ", " ")
|
57 |
+
else:
|
58 |
+
return _inflect.number_to_words(num, andword="")
|
59 |
+
|
60 |
+
|
61 |
+
def normalize_numbers(text):
|
62 |
+
text = re.sub(_comma_number_re, _remove_commas, text)
|
63 |
+
text = re.sub(_pounds_re, r"\1 pounds", text)
|
64 |
+
text = re.sub(_dollars_re, _expand_dollars, text)
|
65 |
+
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
|
66 |
+
text = re.sub(_ordinal_re, _expand_ordinal, text)
|
67 |
+
text = re.sub(_number_re, _expand_number, text)
|
68 |
+
return text
|