Spaces:
Building
Building
Update src/main.py
Browse files- src/main.py +30 -6
src/main.py
CHANGED
@@ -9,6 +9,7 @@ import os
|
|
9 |
import requests
|
10 |
from urllib.parse import quote, unquote
|
11 |
import tempfile
|
|
|
12 |
|
13 |
app = Flask(__name__, static_folder='static')
|
14 |
app.config['TITLE'] = 'Sign Language Translate'
|
@@ -16,9 +17,9 @@ app.config['TITLE'] = 'Sign Language Translate'
|
|
16 |
nlp, dict_docs_spacy = sp.load_spacy_values()
|
17 |
dataset, list_2000_tokens = dg.load_data()
|
18 |
|
19 |
-
def
|
20 |
-
"""
|
21 |
-
return
|
22 |
|
23 |
def spell_out_word(word):
|
24 |
"""단어를 개별 알파벳으로 분리하는 함수"""
|
@@ -26,17 +27,36 @@ def spell_out_word(word):
|
|
26 |
|
27 |
def translate_korean_to_english(text):
|
28 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
url = "https://translate.googleapis.com/translate_a/single"
|
30 |
params = {
|
31 |
"client": "gtx",
|
32 |
"sl": "ko",
|
33 |
"tl": "en",
|
34 |
"dt": "t",
|
35 |
-
"q":
|
36 |
}
|
37 |
response = requests.get(url, params=params)
|
38 |
if response.status_code == 200:
|
39 |
translated_text = ' '.join(item[0] for item in response.json()[0] if item[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
return translated_text
|
41 |
else:
|
42 |
raise Exception(f"Translation API returned status code: {response.status_code}")
|
@@ -103,7 +123,10 @@ def result():
|
|
103 |
if not english_text:
|
104 |
raise Exception("Translation failed")
|
105 |
|
106 |
-
|
|
|
|
|
|
|
107 |
generated_gloss = eng_to_asl_translator.translate_to_gloss()
|
108 |
|
109 |
# 단어 처리
|
@@ -111,7 +134,8 @@ def result():
|
|
111 |
words = generated_gloss.split()
|
112 |
|
113 |
for word in words:
|
114 |
-
|
|
|
115 |
# 고유명사인 경우 철자를 하나씩 분리
|
116 |
spelled_word = spell_out_word(word)
|
117 |
processed_gloss.extend(['FINGERSPELL-START'] + spelled_word.split() + ['FINGERSPELL-END'])
|
|
|
9 |
import requests
|
10 |
from urllib.parse import quote, unquote
|
11 |
import tempfile
|
12 |
+
import re
|
13 |
|
14 |
app = Flask(__name__, static_folder='static')
|
15 |
app.config['TITLE'] = 'Sign Language Translate'
|
|
|
17 |
nlp, dict_docs_spacy = sp.load_spacy_values()
|
18 |
dataset, list_2000_tokens = dg.load_data()
|
19 |
|
20 |
+
def find_quoted_words(text):
|
21 |
+
"""작은따옴표로 묶인 단어들을 찾는 함수"""
|
22 |
+
return re.findall(r"'([^']*)'", text)
|
23 |
|
24 |
def spell_out_word(word):
|
25 |
"""단어를 개별 알파벳으로 분리하는 함수"""
|
|
|
27 |
|
28 |
def translate_korean_to_english(text):
|
29 |
try:
|
30 |
+
# 작은따옴표로 묶인 단어들 찾기
|
31 |
+
quoted_words = find_quoted_words(text)
|
32 |
+
|
33 |
+
# 번역을 위해 임시로 마커로 대체
|
34 |
+
temp_text = text
|
35 |
+
for i, word in enumerate(quoted_words):
|
36 |
+
temp_text = temp_text.replace(f"'{word}'", f"QUOTED_WORD_{i}")
|
37 |
+
|
38 |
url = "https://translate.googleapis.com/translate_a/single"
|
39 |
params = {
|
40 |
"client": "gtx",
|
41 |
"sl": "ko",
|
42 |
"tl": "en",
|
43 |
"dt": "t",
|
44 |
+
"q": temp_text.strip()
|
45 |
}
|
46 |
response = requests.get(url, params=params)
|
47 |
if response.status_code == 200:
|
48 |
translated_text = ' '.join(item[0] for item in response.json()[0] if item[0])
|
49 |
+
|
50 |
+
# 번역된 텍스트에서 마커를 다시 원래 단어로 대체
|
51 |
+
for i, word in enumerate(quoted_words):
|
52 |
+
# 해당 단어를 영어로 별도 번역
|
53 |
+
word_params = params.copy()
|
54 |
+
word_params['q'] = word
|
55 |
+
word_response = requests.get(url, params=word_params)
|
56 |
+
if word_response.status_code == 200:
|
57 |
+
translated_word = word_response.json()[0][0][0].upper()
|
58 |
+
translated_text = translated_text.replace(f"QUOTED_WORD_{i}", f"'{translated_word}'")
|
59 |
+
|
60 |
return translated_text
|
61 |
else:
|
62 |
raise Exception(f"Translation API returned status code: {response.status_code}")
|
|
|
123 |
if not english_text:
|
124 |
raise Exception("Translation failed")
|
125 |
|
126 |
+
# 작은따옴표로 묶인 단어들 찾기
|
127 |
+
quoted_words = find_quoted_words(english_text)
|
128 |
+
|
129 |
+
eng_to_asl_translator = NlpSpacyBaseTranslator(sentence=english_text.replace("'", ""))
|
130 |
generated_gloss = eng_to_asl_translator.translate_to_gloss()
|
131 |
|
132 |
# 단어 처리
|
|
|
134 |
words = generated_gloss.split()
|
135 |
|
136 |
for word in words:
|
137 |
+
word_upper = word.upper()
|
138 |
+
if any(quoted_word.upper() == word_upper for quoted_word in quoted_words):
|
139 |
# 고유명사인 경우 철자를 하나씩 분리
|
140 |
spelled_word = spell_out_word(word)
|
141 |
processed_gloss.extend(['FINGERSPELL-START'] + spelled_word.split() + ['FINGERSPELL-END'])
|