DebasishDhal99 commited on
Commit
08016ad
·
1 Parent(s): 301bdf5

create romanian

Browse files
Files changed (1) hide show
  1. romanian.py +91 -0
romanian.py CHANGED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ special_combs = {
2
+ "Este" : "Yeste", "este" : "yeste",
3
+ "El" : "Yel",
4
+ "Che": "Ke", "che": "ke",
5
+ "Chi": "Ki", "chi": "ki",
6
+ "Ghe": "ଗେ", "ghe": "गे",
7
+ "Ghi": "ଗି", "ghi": "गी",
8
+ "Ch" : "h" , "ch" : "h",
9
+ "Sc" : "Sk" , "sc" : "sk",
10
+ "Ce" : "Чe", "ce" : "чe",
11
+ "Ci" : "Чi", "ci" : "чi",
12
+ "Ge" : "ଜେ", "ge" : "जे",
13
+ "Gi" : "ଜି", "gi" : "जी",
14
+ }
15
+
16
+ romanian_dict = {
17
+ "ă" : "aw", "Ă" : "Aw",
18
+ "â" : "u", "Â" : "U",
19
+ "î" : "u", "Î" : "U",
20
+ "j" : "zh", "J" : "Zh",
21
+ "q" : "k", "Q" : "K",
22
+ "ș" : "sh", "Ș" : "Sh",
23
+ "ț" : "ts", "Ț" : "Ts",
24
+ "c" : "k", "C" : "K",
25
+ }
26
+
27
+ cyrillic_equiv_dict = {
28
+ "ч" : "ch", "Ч" : "Ch",
29
+ "ଗି" : "Gi", "गी": "gi",
30
+ "ଗେ" : "Ge", "गे" : "ge",
31
+ "ଜି" : "Ji", "जी" : "ji",
32
+ "ଜେ" : "Je", "जे" : "je",
33
+ }
34
+
35
+ def romanian_position_conditional_replace(word):
36
+ if len(word) == 1:
37
+ return word
38
+
39
+ if word.startswith("y"):
40
+ word = word.replace("y", "i",1)
41
+
42
+ if word.startswith("Y"):
43
+ word = word.replace("Y", "I",1)
44
+
45
+ if word.startswith("x"): #At beginning or word, x = ks
46
+ word = word.replace("x", "ks",1)
47
+
48
+ x_pattern = r'([aeiouAEIOU])(x)([aeiouAEIOU])' #x between vowels = ks
49
+ replacement = r'\1gz\3'
50
+ word = re.sub(x_pattern, replacement, word)
51
+
52
+ if "x" in word or "X" in word:
53
+ word = word.replace("x", "ks")
54
+ word = word.replace("X", "Ks")
55
+
56
+ return word
57
+
58
+ def check_special_comb(word):
59
+ for key in special_combs.keys():
60
+ if key in word:
61
+ word = word.replace(key, special_combs[key])
62
+ return word
63
+
64
+ def romanian_replace(word):
65
+ for key in romanian_dict.keys():
66
+ word = word.replace(key, romanian_dict[key])
67
+ return word
68
+
69
+ def cyrillic_replace(word):
70
+ for cyrillic in cyrillic_equiv_dict:
71
+ if cyrillic in word:
72
+ word = word.replace(cyrillic,cyrillic_equiv_dict[cyrillic])
73
+ return word
74
+
75
+ def romanian_word_to_latin(word):
76
+ word = romanian_position_conditional_replace(word)
77
+ # print(word)
78
+ word = check_special_comb(word)
79
+ # print(word)
80
+ word = romanian_replace(word)
81
+ # print(word)
82
+ word = cyrillic_replace(word)
83
+ return word
84
+
85
+ def romanian_sentence_to_latin(text):
86
+ tokens = text.split(" ")
87
+ # print(tokens)
88
+ latin_tokens = [romanian_word_to_latin(token) for token in tokens]
89
+ # print(latin_tokens)
90
+ latin_text = " ".join(latin_tokens)
91
+ return latin_text