keithhon commited on
Commit
39d0bf3
·
1 Parent(s): f9d520a

Upload synthesizer/utils/numbers.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. synthesizer/utils/numbers.py +68 -0
synthesizer/utils/numbers.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import inflect
3
+
4
+ _inflect = inflect.engine()
5
+ _comma_number_re = re.compile(r"([0-9][0-9\,]+[0-9])")
6
+ _decimal_number_re = re.compile(r"([0-9]+\.[0-9]+)")
7
+ _pounds_re = re.compile(r"£([0-9\,]*[0-9]+)")
8
+ _dollars_re = re.compile(r"\$([0-9\.\,]*[0-9]+)")
9
+ _ordinal_re = re.compile(r"[0-9]+(st|nd|rd|th)")
10
+ _number_re = re.compile(r"[0-9]+")
11
+
12
+
13
+ def _remove_commas(m):
14
+ return m.group(1).replace(",", "")
15
+
16
+
17
+ def _expand_decimal_point(m):
18
+ return m.group(1).replace(".", " point ")
19
+
20
+
21
+ def _expand_dollars(m):
22
+ match = m.group(1)
23
+ parts = match.split(".")
24
+ if len(parts) > 2:
25
+ return match + " dollars" # Unexpected format
26
+ dollars = int(parts[0]) if parts[0] else 0
27
+ cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
28
+ if dollars and cents:
29
+ dollar_unit = "dollar" if dollars == 1 else "dollars"
30
+ cent_unit = "cent" if cents == 1 else "cents"
31
+ return "%s %s, %s %s" % (dollars, dollar_unit, cents, cent_unit)
32
+ elif dollars:
33
+ dollar_unit = "dollar" if dollars == 1 else "dollars"
34
+ return "%s %s" % (dollars, dollar_unit)
35
+ elif cents:
36
+ cent_unit = "cent" if cents == 1 else "cents"
37
+ return "%s %s" % (cents, cent_unit)
38
+ else:
39
+ return "zero dollars"
40
+
41
+
42
+ def _expand_ordinal(m):
43
+ return _inflect.number_to_words(m.group(0))
44
+
45
+
46
+ def _expand_number(m):
47
+ num = int(m.group(0))
48
+ if num > 1000 and num < 3000:
49
+ if num == 2000:
50
+ return "two thousand"
51
+ elif num > 2000 and num < 2010:
52
+ return "two thousand " + _inflect.number_to_words(num % 100)
53
+ elif num % 100 == 0:
54
+ return _inflect.number_to_words(num // 100) + " hundred"
55
+ else:
56
+ return _inflect.number_to_words(num, andword="", zero="oh", group=2).replace(", ", " ")
57
+ else:
58
+ return _inflect.number_to_words(num, andword="")
59
+
60
+
61
+ def normalize_numbers(text):
62
+ text = re.sub(_comma_number_re, _remove_commas, text)
63
+ text = re.sub(_pounds_re, r"\1 pounds", text)
64
+ text = re.sub(_dollars_re, _expand_dollars, text)
65
+ text = re.sub(_decimal_number_re, _expand_decimal_point, text)
66
+ text = re.sub(_ordinal_re, _expand_ordinal, text)
67
+ text = re.sub(_number_re, _expand_number, text)
68
+ return text