Spaces:
Sleeping
Sleeping
neuralworm
commited on
Commit
•
2554dfe
1
Parent(s):
c45b5bd
Upload utils.py
Browse files
utils.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
logger = logging.getLogger(__name__)
|
3 |
+
logging.basicConfig(level=logging.INFO)
|
4 |
+
|
5 |
+
import inflect
|
6 |
+
from datetime import datetime
|
7 |
+
from deep_translator import GoogleTranslator
|
8 |
+
|
9 |
+
# Custom function to convert number to ordinal words
|
10 |
+
def number_to_ordinal_word(number):
|
11 |
+
ordinal_dict = {
|
12 |
+
1: "first", 2: "second", 3: "third", 4: "fourth", 5: "fifth",
|
13 |
+
6: "sixth", 7: "seventh", 8: "eighth", 9: "ninth", 10: "tenth",
|
14 |
+
11: "eleventh", 12: "twelfth", 13: "thirteenth", 14: "fourteenth",
|
15 |
+
15: "fifteenth", 16: "sixteenth", 17: "seventeenth", 18: "eighteenth",
|
16 |
+
19: "nineteenth", 20: "twentieth", 21: "twentyfirst", 22: "twentysecond",
|
17 |
+
23: "twentythird", 24: "twentyfourth", 25: "twentyfifth",
|
18 |
+
26: "twentysixth", 27: "twentyseventh", 28: "twentyeighth",
|
19 |
+
29: "twentyninth", 30: "thirtieth", 31: "thirtyfirst"
|
20 |
+
}
|
21 |
+
return ordinal_dict.get(number, "")
|
22 |
+
|
23 |
+
def custom_normalize(text):
|
24 |
+
mappings = {
|
25 |
+
'ü': 'ue', 'ö': 'oe', 'ä': 'ae', 'ß': 'ss', 'Ü': 'Ue', 'Ö': 'Oe', 'Ä': 'Ae',
|
26 |
+
'á': 'a', 'à': 'a', 'â': 'a', 'ã': 'a', 'å': 'aa', 'ā': 'a', 'ă': 'a', 'ą': 'a',
|
27 |
+
'Á': 'A', 'À': 'A', 'Â': 'A', 'Ã': 'A', 'Å': 'Aa', 'Ā': 'A', 'Ă': 'A', 'Ą': 'A',
|
28 |
+
'é': 'e', 'è': 'e', 'ê': 'e', 'ë': 'e', 'ē': 'e', 'ĕ': 'e', 'ė': 'e', 'ę': 'e', 'ě': 'e',
|
29 |
+
'É': 'E', 'È': 'E', 'Ê': 'E', 'Ë': 'E', 'Ē': 'E', 'Ĕ': 'E', 'Ė': 'E', 'Ę': 'E', 'Ě': 'E',
|
30 |
+
'í': 'i', 'ì': 'i', 'î': 'i', 'ï': 'i', 'ī': 'i', 'ĭ': 'i', 'į': 'i', 'ı': 'i',
|
31 |
+
'Í': 'I', 'Ì': 'I', 'Î': 'I', 'Ï': 'I', 'Ī': 'I', 'Ĭ': 'I', 'Į': 'I', 'I': 'I',
|
32 |
+
'ó': 'o', 'ò': 'o', 'ô': 'o', 'õ': 'o', 'ø': 'oe', 'ō': 'o', 'ŏ': 'o', 'ő': 'o',
|
33 |
+
'Ó': 'O', 'Ò': 'O', 'Ô': 'O', 'Õ': 'O', 'Ø': 'Oe', 'Ō': 'O', 'Ŏ': 'O', 'Ő': 'O',
|
34 |
+
'ú': 'u', 'ù': 'u', 'û': 'u', 'ū': 'u', 'ŭ': 'u', 'ů': 'u', 'ű': 'u', 'ų': 'u',
|
35 |
+
'Ú': 'U', 'Ù': 'U', 'Û': 'U', 'Ü': 'Ue', 'Ū': 'U', 'Ŭ': 'U', 'Ů': 'U', 'Ű': 'U', 'Ų': 'U',
|
36 |
+
'ç': 'c', 'ć': 'c', 'ĉ': 'c', 'ċ': 'c', 'č': 'c',
|
37 |
+
'Ç': 'C', 'Ć': 'C', 'Ĉ': 'C', 'Ċ': 'C', 'Č': 'C',
|
38 |
+
'ñ': 'n', 'ń': 'n', 'ņ': 'n', 'ň': 'n', 'ŋ': 'n',
|
39 |
+
'Ñ': 'N', 'Ń': 'N', 'Ņ': 'N', 'Ň': 'N', 'Ŋ': 'N',
|
40 |
+
'ý': 'y', 'ÿ': 'y', 'ŷ': 'y',
|
41 |
+
'Ý': 'Y', 'Ÿ': 'Y', 'Ŷ': 'Y',
|
42 |
+
'ž': 'zh', 'ź': 'z', 'ż': 'z',
|
43 |
+
'Ž': 'Zh', 'Ź': 'Z', 'Ż': 'Z',
|
44 |
+
'ð': 'd', 'Ð': 'D', 'þ': 'th', 'Þ': 'Th', 'ł': 'l', 'Ł': 'L', 'đ': 'd', 'Đ': 'D',
|
45 |
+
'æ': 'ae', 'Æ': 'Ae', 'œ': 'oe', 'Œ': 'Oe',
|
46 |
+
'ś': 's', 'ŝ': 's', 'ş': 's', 'š': 's',
|
47 |
+
'Ś': 'S', 'Ŝ': 'S', 'Ş': 'S', 'Š': 'S',
|
48 |
+
'ť': 't', 'ţ': 't', 'ŧ': 't', 'Ť': 'T', 'Ţ': 'T', 'Ŧ': 'T',
|
49 |
+
'ŕ': 'r', 'ř': 'r', 'Ŕ': 'R', 'Ř': 'R',
|
50 |
+
'ľ': 'l', 'ĺ': 'l', 'ļ': 'l', 'ŀ': 'l',
|
51 |
+
'Ľ': 'L', 'Ĺ': 'L', 'Ļ': 'L', 'Ŀ': 'L',
|
52 |
+
'ē': 'e', 'Ē': 'E',
|
53 |
+
'ň': 'n', 'Ň': 'N',
|
54 |
+
'ğ': 'g', 'Ğ': 'G',
|
55 |
+
'ġ': 'g', 'Ġ': 'G',
|
56 |
+
'ħ': 'h', 'Ħ': 'H',
|
57 |
+
'ı': 'i', 'İ': 'I',
|
58 |
+
'ĵ': 'j', 'Ĵ': 'J',
|
59 |
+
'ķ': 'k', 'Ķ': 'K',
|
60 |
+
'ļ': 'l', 'Ļ': 'L',
|
61 |
+
'ņ': 'n', 'Ņ': 'N',
|
62 |
+
'ŧ': 't', 'Ŧ': 'T',
|
63 |
+
'ŭ': 'u', 'Ŭ': 'U'
|
64 |
+
}
|
65 |
+
for key, value in mappings.items():
|
66 |
+
text = text.replace(key, value)
|
67 |
+
return text
|
68 |
+
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
# Convert a numerical date to words with an ordinal day
|
73 |
+
def date_to_words(date_string):
|
74 |
+
# Create an inflect engine
|
75 |
+
inf_engine = inflect.engine()
|
76 |
+
|
77 |
+
date_obj = datetime.strptime(date_string, "%Y-%m-%d")
|
78 |
+
|
79 |
+
# Get year in the desired format
|
80 |
+
year = date_obj.year
|
81 |
+
if 1900 <= year <= 1999:
|
82 |
+
year_words = f"{inf_engine.number_to_words(year // 100, andword='') } hundred"
|
83 |
+
if year % 100 != 0:
|
84 |
+
year_words += f" {inf_engine.number_to_words(year % 100, andword='')}"
|
85 |
+
else:
|
86 |
+
year_words = inf_engine.number_to_words(year, andword='')
|
87 |
+
year_formatted = year_words.replace(',', '') # Remove commas
|
88 |
+
|
89 |
+
month = date_obj.strftime("%B") # Full month name
|
90 |
+
day = date_obj.day
|
91 |
+
day_ordinal = number_to_ordinal_word(day) # Get ordinal word for the day
|
92 |
+
|
93 |
+
output_text = f"{day_ordinal} {month} {year_formatted}"
|
94 |
+
|
95 |
+
return output_text
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
def translate_date_to_words(date, lang='en'):
|
100 |
+
"""Converts a date to words in the specified language."""
|
101 |
+
if date is None:
|
102 |
+
return "No date selected"
|
103 |
+
|
104 |
+
date_string = date.strftime("%Y-%m-%d")
|
105 |
+
logger.info(f"Date string: {date_string}")
|
106 |
+
|
107 |
+
date_in_words = date_to_words(date_string)
|
108 |
+
logger.info(f"Date in words: {date_in_words}")
|
109 |
+
|
110 |
+
translator = GoogleTranslator(source='auto', target=lang)
|
111 |
+
translated_date_words = translator.translate(date_in_words)
|
112 |
+
logger.info(f"Translated date words: {translated_date_words}")
|
113 |
+
|
114 |
+
# Normalize the text if it contains any special characters
|
115 |
+
translated_date_words = custom_normalize(translated_date_words)
|
116 |
+
logger.info(f"Normalized date words: {translated_date_words}")
|
117 |
+
|
118 |
+
return translated_date_words
|