uuuy5615 commited on
Commit
a73dd87
ยท
verified ยท
1 Parent(s): 68a1205

Update backend/spellchecker.py

Browse files
Files changed (1) hide show
  1. backend/spellchecker.py +191 -191
backend/spellchecker.py CHANGED
@@ -1,191 +1,191 @@
1
- import json
2
- import difflib
3
- from hanspell import spell_checker
4
- from hanspell.constants import CheckResult
5
- from kiwipiepy import Kiwi
6
-
7
- ERROR_TYPE_MAPPING = {
8
- CheckResult.PASSED: 0, # ๋ฌธ์ œ๊ฐ€ ์—†๋Š” ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
9
- CheckResult.WRONG_SPELLING: 1, # ๋งž์ถค๋ฒ•์— ๋ฌธ์ œ๊ฐ€ ์žˆ๋Š” ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
10
- CheckResult.WRONG_SPACING: 2, # ๋„์–ด์“ฐ๊ธฐ์— ๋ฌธ์ œ๊ฐ€ ์žˆ๋Š” ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
11
- CheckResult.AMBIGUOUS: 3, # ํ‘œ์ค€์–ด๊ฐ€ ์˜์‹ฌ๋˜๋Š” ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
12
- CheckResult.STATISTICAL_CORRECTION: 4, # ํ†ต๊ณ„์  ๊ต์ •์— ๋”ฐ๋ฅธ ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
13
- }
14
- import difflib
15
-
16
-
17
- def update_corrections_by_error_text(original_text, checked_text, corrections):
18
- updated = []
19
-
20
- for corr in corrections:
21
- error = corr["error"]
22
- start_pos = original_text.find(error)
23
-
24
- if start_pos == -1:
25
- # error ๋ฌธ์žฅ์„ ๋ชป ์ฐพ์€ ๊ฒฝ์šฐ position ๊ธฐ๋ฐ˜์œผ๋กœ fallback
26
- start_pos = corr["position"]
27
-
28
- length = len(error)
29
- # checked_text์—์„œ ๋™์ผ ์œ„์น˜ ์ถ”์ •
30
- corrected_text = checked_text[start_pos : start_pos + length]
31
-
32
- new_corr = corr.copy()
33
- new_corr["checked"] = corrected_text
34
- new_corr["position"] = start_pos # ์œ„์น˜ ๋ณด์ •
35
- new_corr["length"] = length
36
- updated.append(new_corr)
37
-
38
- return updated
39
-
40
-
41
- def extract_phrase(text: str, position: int) -> str:
42
- if position < 0 or position >= len(text):
43
- return ""
44
-
45
- # ์™ผ์ชฝ ํƒ์ƒ‰: position - 1 ๋ถ€ํ„ฐ ๊ณต๋ฐฑ์ด ๋‚˜์˜ฌ ๋•Œ๊นŒ์ง€
46
- left = position - 1
47
- while left >= 0 and text[left] != " ":
48
- left -= 1
49
-
50
- # ์˜ค๋ฅธ์ชฝ ํƒ์ƒ‰: position + 1 ๋ถ€ํ„ฐ ๊ณต๋ฐฑ์ด ๋‚˜์˜ฌ ๋•Œ๊นŒ์ง€
51
- right = position + 1
52
- while right < len(text) and text[right] != " ":
53
- right += 1
54
-
55
- return text[left + 1 : right]
56
-
57
-
58
- def get_space_diffs(original: str, corrected: str):
59
- diffs = []
60
- orig_len = len(original)
61
- corr_len = len(corrected)
62
- o_idx = c_idx = 0
63
-
64
- while o_idx < orig_len and c_idx < corr_len:
65
- o_char = original[o_idx]
66
- c_char = corrected[c_idx]
67
-
68
- # ๋™์ผ ๋ฌธ์ž๋ฉด ํ†ต๊ณผ
69
- if o_char == c_char:
70
- o_idx += 1
71
- c_idx += 1
72
- continue
73
- # ์›๋ฌธ์— ๊ณต๋ฐฑ์ด ์žˆ๊ณ  ๊ต์ •๋ฌธ์— ์—†์œผ๋ฉด โ†’ delete_space
74
- if o_char == " " and c_char != " ":
75
- error = extract_phrase(original, o_idx)
76
- check = spell_checker.check(error).as_dict()["checked"]
77
- diffs.append(
78
- {
79
- "error": error,
80
- "checked": check,
81
- "position": o_idx,
82
- "length": -1,
83
- "errortype": ERROR_TYPE_MAPPING[2],
84
- }
85
- )
86
- o_idx += 1 # ๊ณต๋ฐฑ์„ ๋„˜๊น€
87
-
88
- # ๊ต์ •๋ฌธ์— ๊ณต๋ฐฑ์ด ์žˆ๊ณ  ์›๋ฌธ์— ์—†์œผ๋ฉด โ†’ insert_space
89
- elif c_char == " " and o_char != " ":
90
- # ๊ณต๋ฐฑ์„ ๊ทธ "์•ž ๋ฌธ์ž" ๋’ค์— ์‚ฝ์ž…ํ•œ๋‹ค๊ณ  ๊ฐ€์ •
91
- error = extract_phrase(original, o_idx)
92
- check = spell_checker.check(error).as_dict()["checked"]
93
- diffs.append(
94
- {
95
- "error": error,
96
- "checked": check,
97
- "position": o_idx, # ์›๋ฌธ ๊ธฐ์ค€ ์‚ฝ์ž… ์œ„์น˜
98
- "length": 1,
99
- "errortype": ERROR_TYPE_MAPPING[2],
100
- }
101
- )
102
- c_idx += 1 # ๊ณต๋ฐฑ์„ ๋„˜๊น€
103
-
104
- # ๋‘˜ ๋‹ค ๋‹ค๋ฅด์ง€๋งŒ ๊ณต๋ฐฑ๋„ ์•„๋‹ ๋•Œ (๋ฌธ๋ฒ• ๊ต์ • ๋“ฑ): ๊ทธ๋ƒฅ ๋„˜๊น€
105
- else:
106
- o_idx += 1
107
- c_idx += 1
108
-
109
- return diffs
110
-
111
-
112
- def check(text: str):
113
- ch_text = spell_checker.check(text)
114
- info = ch_text.as_dict()
115
- orig_text = info["original"]
116
- corr_text = info["checked"]
117
- time = info["time"]
118
- if orig_text == corr_text:
119
- flag = 0
120
- else:
121
- flag = 1
122
- print(info["words"])
123
- space = get_space_diffs(orig_text, corr_text)
124
- # 1) originalโ†”corrected ๊ฐ„ ๋ฌธ์ž ๋‹จ์œ„ ๋งคํ•‘ ์ƒ์„ฑ
125
- sm = difflib.SequenceMatcher(None, orig_text, corr_text)
126
- mapping = {}
127
- for tag, i1, i2, j1, j2 in sm.get_opcodes():
128
- if tag == "equal":
129
- # ์ผ์น˜ ๋ธ”๋ก: 1:1 ๋งคํ•‘
130
- for offset in range(i2 - i1):
131
- mapping[j1 + offset] = i1 + offset
132
- elif tag in ("replace", "insert"):
133
- # ๊ต์ฒด๋ธ”๋กยท์‚ฝ์ž…๋ธ”๋ก: ๊ต์ •๋ฌธ์ž ๋ชจ๋‘ ์›๋ณธ ๋ธ”๋ก ์‹œ์ž‘ ์œ„์น˜๋กœ ๋งคํ•‘
134
- for offset in range(j2 - j1):
135
- mapping[j1 + offset] = i1
136
- # 2) ํ† ํฐ๋ณ„๋กœ ์œ„์น˜ ๋ฐ ์›๋ž˜ ํ‹€๋ฆฐ ๋‹จ์–ด ์ถ”์ถœ
137
- corrections = []
138
- for token, status in info["words"].items():
139
- if status == CheckResult.PASSED or status == CheckResult.WRONG_SPACING:
140
- continue
141
-
142
- corr_pos = corr_text.find(token)
143
-
144
- if corr_pos != -1 and corr_pos in mapping:
145
- orig_pos = mapping[corr_pos]
146
- # ์›๋ณธ ํ…์ŠคํŠธ์—์„œ token ๊ธธ์ด๋งŒํผ ์ž˜๋ผ๋‚ธ๋‹ค๋‹ค.
147
- error_word = orig_text[orig_pos : orig_pos + len(token)]
148
- else:
149
- orig_pos = None
150
- error_word = token
151
- length = len(error_word)
152
- corrections.append(
153
- {
154
- "error": error_word,
155
- "checked": token,
156
- "position": orig_pos,
157
- "length": length,
158
- "errortype": ERROR_TYPE_MAPPING[status],
159
- }
160
- )
161
- combined = corrections + space
162
-
163
- sorted_combined = sorted(combined, key=lambda x: x["position"])
164
- result = {
165
- "flag": flag,
166
- "original_text": info["original"],
167
- "checked_text": info["checked"],
168
- "corrections": sorted_combined,
169
- "time": time,
170
- }
171
-
172
- return result
173
-
174
-
175
- if __name__ == "__main__":
176
- sample = "๋‚˜๋Š” ์˜ค๋Š˜ ์•„์นจ๋ฐฅ์„ ๋จน๊ณ  ํ•™๊ต ๋ฅผ ๊ฐ”๋‹ค.ํ•™๊ต ๋ฅผ ์•„๋Š” ์นœ๊ตฌ๋“ค์ด ๋งŽ์น˜๋งŒ, ์˜ค๋Š˜์€ ๋ณ„๋ฃจ ๋ณด์ด์ง€ ์•Š์•˜๋‹ค. ํ•™๊ต์•ž ๋ฌธ๊ตฌ์ ์—์„œ ๋ณผํŽœ์„ ์ƒ€๋Š”๋ฐ, ๊ทธ ๋ณผํŽœ์€ ์ž‰ํฌ๊ฐ€ ์ž์ฃผ ๋ง๋ผ์„œ ์ž์ฃผ ๋ฐ”๊ฟ”์•ผํ•œ๋‹ค. ํ•™๊ต์—์„œ ํ•™๊ต ํ–‰์‚ฌ์— ๋Œ€ํ•œ ์–˜๊ธฐ๋ฅผ ๋“ค์—ˆ๋Š”๋ฐ, ๋ณ„๋ฃจ ๊ธฐ๋Œ€๋Š” ์•ˆ๋œ๋‹ค."
177
- sample2 = "ํ˜„๋Œ€ ๊ต์œก์€ ๋‹จ์ˆœํžˆ ์ง€์‹์„ ์ „๋‹ฌํ•˜๋Š” ๊ฒƒ์„ ๋„˜์–ด์„œ, ํ•™์ƒ์˜ ์ „์ธ์  ์„ฑ์ž˜์„ ๋ชฉํ‘œ๋กœ ํ•œ๋‹ค. ์ด์— ๋”ฐ๋ผ ์ •์„œ์  ์ง€์ง€์™€ ์‚ฌํšŒ์„ฑ ๊ต์œก๋„ ์ ์  ์ค‘์š”ํ•ด์ง€๊ณ  ์žˆ์žˆ๋‹ค. ๊ทธ๋Ÿฌ๋‚˜ ์•„์ง๋„ ๋งŽ์€ ํ•™๊ต์—์„œ๋Š” ์ฃผ์ž…์‹ ๊ต์œก์ด ์ค‘์‹ฌ์ด ๋˜์–ด, ํ•™์ƒ๋“ค์ด ์ฃผ๋„์ ์œผ๋กœ ํ•™์Šตํ•  ๊ธฐํšŒ๊ฐ€ ์ ๋‹ค. ๋˜ํ•œ, ๊ต์‚ฌ๋“ค์˜ ๊ณผ๋„ํ•œ ํ–‰์ •์—…๋ฌด๋กœ ์ธํ•ด ์ˆ˜์—… ์ค€๋น„์— ์ถฉ๋ถ„ํ•œ ์‹œ๊ฐ„์„ ๊ฐ€์งˆ์ˆ˜ ์—†๊ณ , ์ด๋Š” ๊ต์œก์˜ ์งˆ ์ €ํ•˜๋กœ ์ด์–ด์งˆ ์ˆ˜ ์žˆ๋”ฐ. ์ง€์†์ ์ธ ๊ต์‚ฌ ์—ฐ์ˆ˜์™€ ๊ต์œกํ™˜๊ฒฝ ๊ฐœ์„ ์ด ๋’ท๋ฐ›์นจ๋˜์–ด์•ผ๋งŒ ๋ฏธ๋ž˜ํ˜• ๊ต์œก์ด ์‹คํ˜„๋  ์ˆ˜ ์žˆ์Šฌ ๊ฒƒ์ด๋‹ค."
178
- output = check(sample2)
179
- print(json.dumps(output, ensure_ascii=False, indent=2))
180
- print(sample2[79])
181
- # "flag": ๋ฌธ์žฅ์— ๋งž์ถค๋ฒ• ์˜ค๋ฅ˜๊ฐ€ ์žˆ๋Š”์ง€์˜ ์—ฌ๋ถ€(0: ์—†์Œ/1: ์žˆ์Œ)
182
- # "original_text": ์›๋ณธ ๋ฌธ์žฅ
183
- # "checked_text": ๋งž์ถค๋ฒ•์ด ์ˆ˜์ •๋œ ๋ฌธ์žฅ
184
- # "corrections"[
185
- # {
186
- # "error": ๋งž์ถค๋ฒ•์ด ํ‹€๋ฆฐ ๋‹จ์–ด
187
- # "position": ํ‹€๋ฆฐ ๋‹จ์–ด์˜ ๋ฌธ์žฅ ๋‚ด ์œ„์น˜(์‹œ์ž‘์ )
188
- # "errortype": ์˜ค๋ฅ˜ ์œ ํ˜•(1~4)
189
- # },
190
- # ]
191
- # "time": ์†Œ์š” ์‹œ๊ฐ„
 
1
+ import json
2
+ import difflib
3
+ from backend.hanspell import spell_checker
4
+ from backend.hanspell.constants import CheckResult
5
+ from kiwipiepy import Kiwi
6
+
7
+ ERROR_TYPE_MAPPING = {
8
+ CheckResult.PASSED: 0, # ๋ฌธ์ œ๊ฐ€ ์—†๋Š” ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
9
+ CheckResult.WRONG_SPELLING: 1, # ๋งž์ถค๋ฒ•์— ๋ฌธ์ œ๊ฐ€ ์žˆ๋Š” ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
10
+ CheckResult.WRONG_SPACING: 2, # ๋„์–ด์“ฐ๊ธฐ์— ๋ฌธ์ œ๊ฐ€ ์žˆ๋Š” ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
11
+ CheckResult.AMBIGUOUS: 3, # ํ‘œ์ค€์–ด๊ฐ€ ์˜์‹ฌ๋˜๋Š” ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
12
+ CheckResult.STATISTICAL_CORRECTION: 4, # ํ†ต๊ณ„์  ๊ต์ •์— ๋”ฐ๋ฅธ ๋‹จ์–ด ๋˜๋Š” ๊ตฌ์ ˆ
13
+ }
14
+ import difflib
15
+
16
+
17
+ def update_corrections_by_error_text(original_text, checked_text, corrections):
18
+ updated = []
19
+
20
+ for corr in corrections:
21
+ error = corr["error"]
22
+ start_pos = original_text.find(error)
23
+
24
+ if start_pos == -1:
25
+ # error ๋ฌธ์žฅ์„ ๋ชป ์ฐพ์€ ๊ฒฝ์šฐ position ๊ธฐ๋ฐ˜์œผ๋กœ fallback
26
+ start_pos = corr["position"]
27
+
28
+ length = len(error)
29
+ # checked_text์—์„œ ๋™์ผ ์œ„์น˜ ์ถ”์ •
30
+ corrected_text = checked_text[start_pos : start_pos + length]
31
+
32
+ new_corr = corr.copy()
33
+ new_corr["checked"] = corrected_text
34
+ new_corr["position"] = start_pos # ์œ„์น˜ ๋ณด์ •
35
+ new_corr["length"] = length
36
+ updated.append(new_corr)
37
+
38
+ return updated
39
+
40
+
41
+ def extract_phrase(text: str, position: int) -> str:
42
+ if position < 0 or position >= len(text):
43
+ return ""
44
+
45
+ # ์™ผ์ชฝ ํƒ์ƒ‰: position - 1 ๋ถ€ํ„ฐ ๊ณต๋ฐฑ์ด ๋‚˜์˜ฌ ๋•Œ๊นŒ์ง€
46
+ left = position - 1
47
+ while left >= 0 and text[left] != " ":
48
+ left -= 1
49
+
50
+ # ์˜ค๋ฅธ์ชฝ ํƒ์ƒ‰: position + 1 ๋ถ€ํ„ฐ ๊ณต๋ฐฑ์ด ๋‚˜์˜ฌ ๋•Œ๊นŒ์ง€
51
+ right = position + 1
52
+ while right < len(text) and text[right] != " ":
53
+ right += 1
54
+
55
+ return text[left + 1 : right]
56
+
57
+
58
+ def get_space_diffs(original: str, corrected: str):
59
+ diffs = []
60
+ orig_len = len(original)
61
+ corr_len = len(corrected)
62
+ o_idx = c_idx = 0
63
+
64
+ while o_idx < orig_len and c_idx < corr_len:
65
+ o_char = original[o_idx]
66
+ c_char = corrected[c_idx]
67
+
68
+ # ๋™์ผ ๋ฌธ์ž๋ฉด ํ†ต๊ณผ
69
+ if o_char == c_char:
70
+ o_idx += 1
71
+ c_idx += 1
72
+ continue
73
+ # ์›๋ฌธ์— ๊ณต๋ฐฑ์ด ์žˆ๊ณ  ๊ต์ •๋ฌธ์— ์—†์œผ๋ฉด โ†’ delete_space
74
+ if o_char == " " and c_char != " ":
75
+ error = extract_phrase(original, o_idx)
76
+ check = spell_checker.check(error).as_dict()["checked"]
77
+ diffs.append(
78
+ {
79
+ "error": error,
80
+ "checked": check,
81
+ "position": o_idx,
82
+ "length": -1,
83
+ "errortype": ERROR_TYPE_MAPPING[2],
84
+ }
85
+ )
86
+ o_idx += 1 # ๊ณต๋ฐฑ์„ ๋„˜๊น€
87
+
88
+ # ๊ต์ •๋ฌธ์— ๊ณต๋ฐฑ์ด ์žˆ๊ณ  ์›๋ฌธ์— ์—†์œผ๋ฉด โ†’ insert_space
89
+ elif c_char == " " and o_char != " ":
90
+ # ๊ณต๋ฐฑ์„ ๊ทธ "์•ž ๋ฌธ์ž" ๋’ค์— ์‚ฝ์ž…ํ•œ๋‹ค๊ณ  ๊ฐ€์ •
91
+ error = extract_phrase(original, o_idx)
92
+ check = spell_checker.check(error).as_dict()["checked"]
93
+ diffs.append(
94
+ {
95
+ "error": error,
96
+ "checked": check,
97
+ "position": o_idx, # ์›๋ฌธ ๊ธฐ์ค€ ์‚ฝ์ž… ์œ„์น˜
98
+ "length": 1,
99
+ "errortype": ERROR_TYPE_MAPPING[2],
100
+ }
101
+ )
102
+ c_idx += 1 # ๊ณต๋ฐฑ์„ ๋„˜๊น€
103
+
104
+ # ๋‘˜ ๋‹ค ๋‹ค๋ฅด์ง€๋งŒ ๊ณต๋ฐฑ๋„ ์•„๋‹ ๋•Œ (๋ฌธ๋ฒ• ๊ต์ • ๋“ฑ): ๊ทธ๋ƒฅ ๋„˜๊น€
105
+ else:
106
+ o_idx += 1
107
+ c_idx += 1
108
+
109
+ return diffs
110
+
111
+
112
+ def check(text: str):
113
+ ch_text = spell_checker.check(text)
114
+ info = ch_text.as_dict()
115
+ orig_text = info["original"]
116
+ corr_text = info["checked"]
117
+ time = info["time"]
118
+ if orig_text == corr_text:
119
+ flag = 0
120
+ else:
121
+ flag = 1
122
+ print(info["words"])
123
+ space = get_space_diffs(orig_text, corr_text)
124
+ # 1) originalโ†”corrected ๊ฐ„ ๋ฌธ์ž ๋‹จ์œ„ ๋งคํ•‘ ์ƒ์„ฑ
125
+ sm = difflib.SequenceMatcher(None, orig_text, corr_text)
126
+ mapping = {}
127
+ for tag, i1, i2, j1, j2 in sm.get_opcodes():
128
+ if tag == "equal":
129
+ # ์ผ์น˜ ๋ธ”๋ก: 1:1 ๋งคํ•‘
130
+ for offset in range(i2 - i1):
131
+ mapping[j1 + offset] = i1 + offset
132
+ elif tag in ("replace", "insert"):
133
+ # ๊ต์ฒด๋ธ”๋กยท์‚ฝ์ž…๋ธ”๋ก: ๊ต์ •๋ฌธ์ž ๋ชจ๋‘ ์›๋ณธ ๋ธ”๋ก ์‹œ์ž‘ ์œ„์น˜๋กœ ๋งคํ•‘
134
+ for offset in range(j2 - j1):
135
+ mapping[j1 + offset] = i1
136
+ # 2) ํ† ํฐ๋ณ„๋กœ ์œ„์น˜ ๋ฐ ์›๋ž˜ ํ‹€๋ฆฐ ๋‹จ์–ด ์ถ”์ถœ
137
+ corrections = []
138
+ for token, status in info["words"].items():
139
+ if status == CheckResult.PASSED or status == CheckResult.WRONG_SPACING:
140
+ continue
141
+
142
+ corr_pos = corr_text.find(token)
143
+
144
+ if corr_pos != -1 and corr_pos in mapping:
145
+ orig_pos = mapping[corr_pos]
146
+ # ์›๋ณธ ํ…์ŠคํŠธ์—์„œ token ๊ธธ์ด๋งŒํผ ์ž˜๋ผ๋‚ธ๋‹ค๋‹ค.
147
+ error_word = orig_text[orig_pos : orig_pos + len(token)]
148
+ else:
149
+ orig_pos = None
150
+ error_word = token
151
+ length = len(error_word)
152
+ corrections.append(
153
+ {
154
+ "error": error_word,
155
+ "checked": token,
156
+ "position": orig_pos,
157
+ "length": length,
158
+ "errortype": ERROR_TYPE_MAPPING[status],
159
+ }
160
+ )
161
+ combined = corrections + space
162
+
163
+ sorted_combined = sorted(combined, key=lambda x: x["position"])
164
+ result = {
165
+ "flag": flag,
166
+ "original_text": info["original"],
167
+ "checked_text": info["checked"],
168
+ "corrections": sorted_combined,
169
+ "time": time,
170
+ }
171
+
172
+ return result
173
+
174
+
175
+ if __name__ == "__main__":
176
+ sample = "๋‚˜๋Š” ์˜ค๋Š˜ ์•„์นจ๋ฐฅ์„ ๋จน๊ณ  ํ•™๊ต ๋ฅผ ๊ฐ”๋‹ค.ํ•™๊ต ๋ฅผ ์•„๋Š” ์นœ๊ตฌ๋“ค์ด ๋งŽ์น˜๋งŒ, ์˜ค๋Š˜์€ ๋ณ„๋ฃจ ๋ณด์ด์ง€ ์•Š์•˜๋‹ค. ํ•™๊ต์•ž ๋ฌธ๊ตฌ์ ์—์„œ ๋ณผํŽœ์„ ์ƒ€๋Š”๋ฐ, ๊ทธ ๋ณผํŽœ์€ ์ž‰ํฌ๊ฐ€ ์ž์ฃผ ๋ง๋ผ์„œ ์ž์ฃผ ๋ฐ”๊ฟ”์•ผํ•œ๋‹ค. ํ•™๊ต์—์„œ ํ•™๊ต ํ–‰์‚ฌ์— ๋Œ€ํ•œ ์–˜๊ธฐ๋ฅผ ๋“ค์—ˆ๋Š”๋ฐ, ๋ณ„๋ฃจ ๊ธฐ๋Œ€๋Š” ์•ˆ๋œ๋‹ค."
177
+ sample2 = "ํ˜„๋Œ€ ๊ต์œก์€ ๋‹จ์ˆœํžˆ ์ง€์‹์„ ์ „๋‹ฌํ•˜๋Š” ๊ฒƒ์„ ๋„˜์–ด์„œ, ํ•™์ƒ์˜ ์ „์ธ์  ์„ฑ์ž˜์„ ๋ชฉํ‘œ๋กœ ํ•œ๋‹ค. ์ด์— ๋”ฐ๋ผ ์ •์„œ์  ์ง€์ง€์™€ ์‚ฌํšŒ์„ฑ ๊ต์œก๋„ ์ ์  ์ค‘์š”ํ•ด์ง€๊ณ  ์žˆ์žˆ๋‹ค. ๊ทธ๋Ÿฌ๋‚˜ ์•„์ง๋„ ๋งŽ์€ ํ•™๊ต์—์„œ๋Š” ์ฃผ์ž…์‹ ๊ต์œก์ด ์ค‘์‹ฌ์ด ๋˜์–ด, ํ•™์ƒ๋“ค์ด ์ฃผ๋„์ ์œผ๋กœ ํ•™์Šตํ•  ๊ธฐํšŒ๊ฐ€ ์ ๋‹ค. ๋˜ํ•œ, ๊ต์‚ฌ๋“ค์˜ ๊ณผ๋„ํ•œ ํ–‰์ •์—…๋ฌด๋กœ ์ธํ•ด ์ˆ˜์—… ์ค€๋น„์— ์ถฉ๋ถ„ํ•œ ์‹œ๊ฐ„์„ ๊ฐ€์งˆ์ˆ˜ ์—†๊ณ , ์ด๋Š” ๊ต์œก์˜ ์งˆ ์ €ํ•˜๋กœ ์ด์–ด์งˆ ์ˆ˜ ์žˆ๋”ฐ. ์ง€์†์ ์ธ ๊ต์‚ฌ ์—ฐ์ˆ˜์™€ ๊ต์œกํ™˜๊ฒฝ ๊ฐœ์„ ์ด ๋’ท๋ฐ›์นจ๋˜์–ด์•ผ๋งŒ ๋ฏธ๋ž˜ํ˜• ๊ต์œก์ด ์‹คํ˜„๋  ์ˆ˜ ์žˆ์Šฌ ๊ฒƒ์ด๋‹ค."
178
+ output = check(sample2)
179
+ print(json.dumps(output, ensure_ascii=False, indent=2))
180
+ print(sample2[79])
181
+ # "flag": ๋ฌธ์žฅ์— ๋งž์ถค๋ฒ• ์˜ค๋ฅ˜๊ฐ€ ์žˆ๋Š”์ง€์˜ ์—ฌ๋ถ€(0: ์—†์Œ/1: ์žˆ์Œ)
182
+ # "original_text": ์›๋ณธ ๋ฌธ์žฅ
183
+ # "checked_text": ๋งž์ถค๋ฒ•์ด ์ˆ˜์ •๋œ ๋ฌธ์žฅ
184
+ # "corrections"[
185
+ # {
186
+ # "error": ๋งž์ถค๋ฒ•์ด ํ‹€๋ฆฐ ๋‹จ์–ด
187
+ # "position": ํ‹€๋ฆฐ ๋‹จ์–ด์˜ ๋ฌธ์žฅ ๋‚ด ์œ„์น˜(์‹œ์ž‘์ )
188
+ # "errortype": ์˜ค๋ฅ˜ ์œ ํ˜•(1~4)
189
+ # },
190
+ # ]
191
+ # "time": ์†Œ์š” ์‹œ๊ฐ„