Spaces:

ginigen
/

Sign-language

Running

App Files Files Community

ginipick commited on Jan 26

Commit

a9522e9

verified ·

1 Parent(s): bb7c4ad

Update src/main.py

Browse files

Files changed (1) hide show

src/main.py +31 -32

src/main.py CHANGED Viewed

@@ -50,25 +50,28 @@ def translate_quoted_word(word):
 def translate_korean_to_english(text):
     """전체 텍스트 번역 함수"""
     try:
-        # 입력 텍스트에서 따옴표 형식 검사 및 수정
-        text = text.replace("'", "'").replace("'", "'")  # 스마트 따옴표를 일반 따옴표로 변환
-        # 잘못된 따옴표 위치 수정 (예: 한국'을 '한국'으로)
-        text = re.sub(r"(\w+)'", r"'\1'", text)
-        # 1. 따옴표로 묶인 부분을 찾아서 따로 번역
-        quoted_words = find_quoted_words(text)
-        translated_quoted = {}
-        # 따옴표 안의 단어들 먼저 번역
         for word in quoted_words:
-            translated = translate_quoted_word(word)
-            translated_quoted[word] = translated
-            # 임시 마커로 대체
-            text = text.replace(f"'{word}'", f"QUOTED_{len(translated_quoted)}_")
-        # 전체 문장 번역
-        url = "https://translate.googleapis.com/translate_a/single"
         params = {
             "client": "gtx",
             "sl": "ko",
@@ -77,17 +80,14 @@ def translate_korean_to_english(text):
             "q": text
         }
         response = requests.get(url, params=params)
         if response.status_code == 200:
-            translated_text = ' '.join(item[0] for item in response.json()[0] if item[0])
-            # 번역된 텍스트에서 마커를 번역된 단어로 대체
-            for i, (original, translated) in enumerate(translated_quoted.items(), 1):
-                translated_text = translated_text.replace(f"QUOTED_{i}_", f"'{translated}'")
-            # 불필요한 공백 정리
-            translated_text = re.sub(r'\s+', ' ', translated_text).strip()
-            return translated_text
         else:
             raise Exception(f"Translation API returned status code: {response.status_code}")
     except Exception as e:
@@ -108,7 +108,7 @@ def result():
                 raise Exception("Translation failed")
             # 따옴표로 묶인 단어 추출 (번역된 영어 텍스트에서)
-            quoted_words = re.findall(r"'([^']*)'", english_text)
             # 번역된 텍스트에서 따옴표 제거하고 ASL 변환
             clean_english = re.sub(r"'([^']*)'", r"\1", english_text)
@@ -138,16 +138,15 @@ def result():
             i = 0
             while i < len(processed_gloss):
                 if processed_gloss[i] == 'FINGERSPELL-START':
-                    # 철자 처리 부분을 그대로 유지
-                    final_gloss.append(processed_gloss[i])
-                    i += 1
                     while i < len(processed_gloss) and processed_gloss[i] != 'FINGERSPELL-END':
                         final_gloss.append(processed_gloss[i])
                         i += 1
-                    final_gloss.append('FINGERSPELL-END')
-                    i += 1
                 else:
-                    # 일반 단어는 동의어 처리
                     word = processed_gloss[i]
                     final_gloss.append(sp.find_synonyms(word, nlp, dict_docs_spacy, list_2000_tokens))
                     i += 1

 def translate_korean_to_english(text):
     """전체 텍스트 번역 함수"""
     try:
+        # 1. 따옴표로 묶인 단어 찾기
+        quoted_words = re.findall(r"'([^']*)'", text)
+        # 2. 각 따옴표 단어 개별 번역
+        translated_words = {}
         for word in quoted_words:
+            url = "https://translate.googleapis.com/translate_a/single"
+            params = {
+                "client": "gtx",
+                "sl": "ko",
+                "tl": "en",
+                "dt": "t",
+                "q": word
+            }
+            response = requests.get(url, params=params)
+            if response.status_code == 200:
+                translated = response.json()[0][0][0].upper()
+                translated_words[word] = translated
+                # 임시로 원본 텍스트의 따옴표 부분을 특수 마커로 대체
+                text = text.replace(f"'{word}'", "PROPER_NOUN_MARKER")
+        # 3. 전체 문장 번역
         params = {
             "client": "gtx",
             "sl": "ko",
             "q": text
         }
         response = requests.get(url, params=params)
         if response.status_code == 200:
+            full_translation = response.json()[0][0][0]
+            # 4. 번역된 문장에서 마커를 번역된 고유명사로 대체
+            for original, translated in translated_words.items():
+                full_translation = full_translation.replace("PROPER_NOUN_MARKER", f"'{translated}'", 1)
+            return full_translation
         else:
             raise Exception(f"Translation API returned status code: {response.status_code}")
     except Exception as e:
                 raise Exception("Translation failed")
             # 따옴표로 묶인 단어 추출 (번역된 영어 텍스트에서)
+            quoted_words = [word.strip("'") for word in re.findall(r"'([^']*)'", english_text)]
             # 번역된 텍스트에서 따옴표 제거하고 ASL 변환
             clean_english = re.sub(r"'([^']*)'", r"\1", english_text)
             i = 0
             while i < len(processed_gloss):
                 if processed_gloss[i] == 'FINGERSPELL-START':
+                    final_gloss.extend(processed_gloss[i:i+2])  # START와 첫 글자 추가
+                    i += 2
                     while i < len(processed_gloss) and processed_gloss[i] != 'FINGERSPELL-END':
                         final_gloss.append(processed_gloss[i])
                         i += 1
+                    if i < len(processed_gloss):
+                        final_gloss.append(processed_gloss[i])  # END 추가
+                        i += 1
                 else:
                     word = processed_gloss[i]
                     final_gloss.append(sp.find_synonyms(word, nlp, dict_docs_spacy, list_2000_tokens))
                     i += 1