Hyeonseo commited on
Commit
3a7b4f7
Β·
verified Β·
1 Parent(s): 500edea

Upload: digit2Text file

Browse files
Files changed (1) hide show
  1. TextUtil/digit2text.py +134 -0
TextUtil/digit2text.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #-*- coding:utf-8 -*-
2
+ # https://m.blog.naver.com/PostView.naver?isHttpsRedirect=true&blogId=chandong83&logNo=221144077279
3
+ # μœ„ μ½”λ“œμ—μ„œ μΆ”κ°€ μˆ˜μ •
4
+
5
+ # 만 λ‹¨μœ„ 자릿수
6
+ tenThousandPos = 4
7
+ # μ–΅ λ‹¨μœ„ 자릿수
8
+ hundredMillionPos = 9
9
+ txtDigit = ['', 'μ‹­', 'λ°±', '천', '만', 'μ–΅']
10
+ txtNumber = ['', '일', '이', 'μ‚Ό', '사', '였', '윑', 'μΉ ', 'νŒ”', 'ꡬ']
11
+ txtPoint = '쩜 '
12
+
13
+ def digit2txt(strNum):
14
+ resultStr = ''
15
+ digitCount = 0
16
+ # print(strNum)
17
+ #자릿수 카운트
18
+ for ch in strNum:
19
+ # ',' λ¬΄μ‹œ
20
+ if ch == ',':
21
+ continue
22
+ #μ†Œμˆ«μ  κΉŒμ§€
23
+ elif ch == '.':
24
+ break
25
+ digitCount = digitCount + 1
26
+
27
+
28
+ digitCount = digitCount-1
29
+ index = 0
30
+
31
+ while True:
32
+ notShowDigit = False
33
+ ch = strNum[index]
34
+ #print(str(index) + ' ' + ch + ' ' +str(digitCount))
35
+ # ',' λ¬΄μ‹œ
36
+ if ch == ',':
37
+ index = index + 1
38
+ if index >= len(strNum):
39
+ break;
40
+ continue
41
+
42
+ if ch == '.':
43
+ # [μˆ˜μ •] 0.13 처럼 1μ΄ν•˜μ˜ 값에 λŒ€ν•œ 처리 μΆ”κ°€
44
+ if strNum[index - 1] == '0' and not resultStr:
45
+ resultStr = '영'
46
+ resultStr += txtPoint
47
+ else:
48
+ # μžλ¦Ώμˆ˜κ°€ 2자리이고 1이면 '일'은 ν‘œμ‹œ μ•ˆν•¨.
49
+ # 단 '만' 'μ–΅'μ—μ„œλŠ” ν‘œμ‹œ 함
50
+ # [μˆ˜μ •] digitCount >= 1으둜 μ„€μ •ν•˜μ—¬ 'μ‹­' λ‹¨μœ„μ—μ„œλ„ ν‘œμ‹œ
51
+ if(digitCount >= 1) and (digitCount != tenThousandPos) and (digitCount != hundredMillionPos) and int(ch) == 1:
52
+ resultStr = resultStr + ''
53
+ elif int(ch) == 0:
54
+ resultStr = resultStr + ''
55
+ # 단 '만' 'μ–΅'μ—μ„œλŠ” ν‘œμ‹œ 함
56
+ if (digitCount != tenThousandPos) and (digitCount != hundredMillionPos):
57
+ notShowDigit = True
58
+ else:
59
+ resultStr = resultStr + txtNumber[int(ch)]
60
+
61
+
62
+ # 1μ–΅ 이상
63
+ if digitCount > hundredMillionPos:
64
+ if not notShowDigit:
65
+ resultStr = resultStr + txtDigit[digitCount-hundredMillionPos]
66
+ # 1만 이상
67
+ elif digitCount > tenThousandPos:
68
+ if not notShowDigit:
69
+ resultStr = resultStr + txtDigit[digitCount-tenThousandPos]
70
+ else:
71
+ if not notShowDigit:
72
+ resultStr = resultStr + txtDigit[digitCount]
73
+
74
+ if digitCount <= 0:
75
+ digitCount = 0
76
+ else:
77
+ digitCount = digitCount - 1
78
+ index = index + 1
79
+ if index >= len(strNum):
80
+ break;
81
+ return resultStr
82
+
83
+
84
+ NATIVE_MAP_ONES = {
85
+ # "ν•˜λ‚˜": 1, "λ‘˜": 2, "μ…‹": 3, "λ„·": 4, "λ‹€μ„―": 5,
86
+ "ν•œ": 1, "두": 2, "μ„Έ": 3, "λ„·": 4, "λ‹€μ„―": 5,
87
+ "μ—¬μ„―": 6, "일곱": 7, "μ—¬λŸ": 8, "아홉": 9,
88
+ # "ν•œ": 1, "두": 2, "μ„Έ": 3, "석": 3, "μ„œ": 3, "λ„€": 4, "넉": 4, "λ„ˆ": 4,
89
+ # "λ‹·": 5, "μ—Ώ": 6
90
+ }
91
+
92
+ MAP_TENS = {
93
+ "μ—΄": 10, "슀물": 20, "μ„œλ₯Έ": 30, "λ§ˆν”": 40, "μ‰°": 50,
94
+ "예순": 60, "일흔": 70, "μ—¬λ“ ": 80, "아흔": 90
95
+ }
96
+
97
+
98
+ def NNGdigit2txt(number):
99
+ # 이 ν•¨μˆ˜λŠ” 주어진 숫자λ₯Ό ν•œκ΅­μ–΄ 발음으둜 λ³€ν™˜ν•©λ‹ˆλ‹€.
100
+ # 예: 25 -> "μŠ€λ¬Όλ‹€μ„―", 91 -> "μ•„ν”ν•˜λ‚˜"
101
+ # μ—¬κΈ°μ„œλŠ” μ‹­ λ‹¨μœ„μ™€ κΈ°λ³Έ 숫자의 μ‘°ν•©λ§Œμ„ κ³ λ €ν•©λ‹ˆλ‹€.
102
+ korean_number = ""
103
+ number = int(number)
104
+
105
+ if number >= 100:
106
+ korean_number = digit2txt(str(number))
107
+ elif number < 10:
108
+ for key, value in NATIVE_MAP_ONES.items():
109
+ if value == number:
110
+ return key
111
+ else:
112
+ tens = number // 10
113
+ ones = number % 10
114
+
115
+ for key, value in MAP_TENS.items():
116
+ if value == tens * 10:
117
+ korean_number += key
118
+
119
+ for key, value in NATIVE_MAP_ONES.items():
120
+ if value == ones:
121
+ korean_number += key
122
+
123
+ return korean_number
124
+
125
+ def CSign2txt(csign):
126
+ currency_symbols = {
127
+ "$": "λ‹¬λŸ¬",
128
+ "€": "유둜",
129
+ "Β£": "νŒŒμš΄λ“œ",
130
+ "Β₯": "μ—”",
131
+ "οΏ¦": "원"
132
+ }
133
+
134
+ return currency_symbols.get(csign, "")