Spaces:

350016z
/

TranslationError_Gradio

Running

App Files Files Community

350016z commited on Mar 4

Commit

ae82a4a

verified ·

1 Parent(s): eda02ad

Update app.py

Browse files

Files changed (1) hide show

app.py +225 -137

app.py CHANGED Viewed

@@ -8,7 +8,8 @@ from huggingface_hub import CommitScheduler, snapshot_download
 from uuid import uuid4
 import shutil
-# ------------------------- 中英對照 (前端中文顯示, 後台英文儲存) -------------------------
 category_map = {
     "正確性": "Accuracy",
     "流暢度": "Fluency",
@@ -16,7 +17,6 @@ category_map = {
     "風格": "Style",
     "在地化": "Locale"
 }
 subcategory_map = {
     ("正確性", "誤譯"): ("Accuracy", "Mistranslation"),
     ("正確性", "新增"): ("Accuracy", "Addition"),
@@ -44,7 +44,6 @@ subcategory_map = {
     ("在地化", "地址格式"): ("Locale", "Address format"),
     ("在地化", "其他"): ("Locale", "Other"),
 }
 categories_display = {
     "正確性": ["誤譯", "新增", "漏譯", "其他"],
     "流暢度": ["文法", "拼字", "標點符號", "前後不一致", "語域", "其他"],
@@ -53,35 +52,50 @@ categories_display = {
     "在地化": ["貨幣格式", "時間格式", "人名格式", "日期格式", "地址格式", "其他"]
 }
-severity_choices_display = ["輕微 (Minor)", "嚴重 (Major)"]
 severity_map = {
     "輕微 (Minor)": "Minor",
     "嚴重 (Major)": "Major"
 }
 DATASET_DIR = Path("json_dataset")
 DATASET_DIR.mkdir(parents=True, exist_ok=True)
 scheduler = CommitScheduler(
-    repo_id="350016z/TaiwanCOMET_dataset",
     repo_type="dataset",
     folder_path=DATASET_DIR,
     path_in_repo="data"
 )
-# ---------------------------下載CSV資料檔--------------------------------
 def download_dataset_file(dataset_id, local_dir):
     snapshot_path = snapshot_download(repo_id=dataset_id, repo_type="dataset")
     contents = os.listdir(snapshot_path)
     for file_name in contents:
-        print("Checking file: ", file_name)
         if file_name.endswith(".csv"):
             source_file_path = os.path.join(snapshot_path, file_name)
             local_file_path = os.path.join(local_dir, file_name)
             shutil.copy(source_file_path, local_file_path)
-            print(f"Copied {file_name} to {local_file_path}")
-            print(f"Permissions for {local_file_path}: {oct(os.stat(local_file_path).st_mode)}")
             time.sleep(1)
     return local_dir
@@ -95,8 +109,6 @@ if not csv_files:
     exit()
 data_path = os.path.join(current_dir, 'test.csv') if 'test.csv' in csv_files else os.path.join(current_dir, csv_files[0])
-print(f"Data path: {data_path}")
 if not os.path.exists(data_path):
     print(f"Error: {data_path} does not exist. Please check the file path.")
     exit()
@@ -106,9 +118,7 @@ current_index = 0
 current_errors = []
 annotations_file = DATASET_DIR / f"test_annotations-{uuid4()}.json"
-# 存放所有已提交標註(方便後續查看歷史)，此範例主要顯示當前紀錄
-annotation_history = []
 def get_all_ids():
     """
@@ -137,24 +147,12 @@ def save_to_json(entry: dict, json_file: Path):
             json.dump(entry, f, ensure_ascii=False)
             f.write("\n")
-def get_error_dataframe():
-    """
-    只顯示「text」「severity」「category」三個欄位，不顯示 start/end。
-    """
-    df = pd.DataFrame(current_errors)
-    if df.empty:
-        return pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"])
-    # 轉成中文欄位
-    display_df = pd.DataFrame()
-    display_df["錯誤文字"] = df["text"]
-    display_df["嚴重度"] = df["severity"]
-    display_df["分類"] = df["category"]
-    return display_df
 def highlight_errors_in_text(text, errors):
     """
-    在文本中以 <span style="background-color:yellow;">...</span> 方式高亮顯示錯誤區間。
     """
     highlighted = ""
     last_end = 0
     for err in sorted(errors, key=lambda e: e["start"]):
@@ -168,45 +166,139 @@ def highlight_errors_in_text(text, errors):
     highlighted += text[last_end:]
     return highlighted
 def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
     global current_index, data, current_errors
     if len(current_errors) >= 5:
-        return "", "您已標記超過 5 處錯誤，可直接按「過多錯誤」或繼續標注。"
     if error_span and error_span not in target:
         return "", "錯誤區間不存在於翻譯文本，請檢查！"
-    # 中英轉換
     cat_val, subcat_val = subcategory_map.get((category, subcategory), (category_map.get(category, "Other"), "Other"))
     severity_val = severity_map.get(severity, "Minor")
     if error_span:
         start = target.find(error_span)
         end = start + len(error_span)
         for err in current_errors:
             if err["start"] == start and err["end"] == end:
-                return "", "此錯誤區間已標記過，請勿重複標記。"
-        # 若子類別是 "其他" 且 user 有填 other，就使用 other
-        if subcat_val == "Other" and other:
-            subcat_val = other
         current_errors.append({
             "text": error_span,
             "severity": severity_val,
             "start": start,
             "end": end,
-            "category": f"{cat_val}/{subcat_val}",
         })
-        return "", f"已記錄錯誤區間: {error_span}，範圍 {start}-{end}。"
     else:
-        return "", "請輸入錯誤區間或點選『完全正確』"
 def save_and_next(source, target, score, rater_selector, alternative_translation):
     global current_index, data, annotations_file, current_errors, annotation_history
     if not rater_selector:
         return (
-            source, target, "",
             str(data.loc[current_index, "id"]),
             "請先選擇標註人員！",
             get_error_dataframe(),
@@ -248,17 +340,17 @@ def save_and_next(source, target, score, rater_selector, alternative_translation
     if current_index >= len(data):
         return (
-            "已完成所有文本標記",
-            "已完成所有文本標記",
-            "",
-            "",
-            f"所有標記已完成並保存到 {annotations_file.name}！ (總共 {len(data)} 筆)",
             pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"]),
             ""
         )
     next_source, next_target = get_current_text()
-    status_msg = f"評分與標記已提交！已完成第 {current_index} 筆 / 共 {len(data)} 筆。"
     return (
         next_source,
         next_target,
@@ -298,45 +390,34 @@ def update_index_selection(selected_display):
     selected_id = parse_id_from_display(selected_display)
     row_list = data.index[data["id"] == selected_id].tolist()
     if not row_list:
-        return "", "", str(selected_id), f"找不到id: {selected_id}", get_error_dataframe(), ""
     current_index = row_list[0]
     src, tgt = get_current_text()
-    return src, tgt, str(selected_id), f"已跳轉至 id: {selected_id}", get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)
-def mark_as_correct():
-    global current_errors
-    current_errors.append({
-        "text": "",
-        "severity": "No-error",
-        "start": 0,
-        "end": 0,
-        "category": "No-error"
-    })
-    return "", "標註為完全正確，無錯誤！", get_error_dataframe()
-def mark_as_too_many_errors():
-    global current_errors
-    current_errors.append({
-        "text": "",
-        "severity": "Major",
-        "start": 0,
-        "end": 0,
-        "category": "Non-translation"
-    })
-    return "", "已標註為過多錯誤！", get_error_dataframe()
-DEMO_EXPLANATION = """
-## 翻譯標記工具
-1. 選擇檔案、標註人員、以及想要檢視的索引(句子)。
-2. 檢查「翻譯文本」是否有錯誤，如有，請選擇「錯誤類別」、「子類別」、「嚴重度」，並在「錯誤區間」貼上有問題的翻譯文字。
-3. 按「保存並繼續標記當前資料」，錯誤會暫時列在右方的「當前句子錯誤紀錄」中。
-4. 全部錯誤標記完後，可給分(0-100)，並可在「建議翻譯」中填寫更好的譯文。
-5. 按「保存並顯示下一筆」，會提交當前這筆紀錄並跳至下一筆。
-6. 若整句都正確，可按「完全正確」。若錯誤超過五處，可按「過多錯誤」。
 """
 with gr.Blocks(css="""
-    /* 調整整體字體大小與行距 */
     * {
         font-size: 15px;
         line-height: 1.4;
@@ -346,24 +427,30 @@ with gr.Blocks(css="""
         padding: 10px;
         margin-bottom: 10px;
     }
-    /* 調整按鈕外觀 */
     #correct_button {
-        background-color: #4CAF50;
         color: white;
         font-size: 14px;
         margin-bottom: 5px;
     }
     #too_many_errors_button {
-        background-color: #f44336;
         color: white;
         font-size: 14px;
         margin-bottom: 5px;
     }
 """) as demo:
     gr.Markdown(DEMO_EXPLANATION)
     with gr.Tab("標記工具"):
-        # ------------------- 第一行：上方控制 -------------------
         with gr.Row():
             with gr.Column(scale=1):
                 rater_selector = gr.Dropdown(
@@ -377,7 +464,7 @@ with gr.Blocks(css="""
                     value="test.csv"
                 )
                 index_selector = gr.Dropdown(
-                    label="選擇索引(id-原文前10字)",
                     choices=get_all_ids(),
                     value=f"{data.loc[current_index, 'id']}-{str(data.loc[current_index, 'source'])[:10]}"
                 )
@@ -387,34 +474,33 @@ with gr.Blocks(css="""
                     interactive=False
                 )
-            # ----------------- 中間：原始文本 -----------------
             with gr.Column(scale=4):
-                source = gr.Textbox(label="原始文本", lines=5, interactive=False)
-            # ----------------- 右側：翻譯文本 -----------------
             with gr.Column(scale=4):
-                target = gr.Textbox(label="翻譯文本", lines=5, interactive=False)
-        # ------------------- 第二行：高亮 & 錯誤紀錄 -------------------
         with gr.Row():
             with gr.Column(scale=5):
-                highlighted_target = gr.HTML(label="高亮顯示錯誤區間")
             with gr.Column(scale=5):
                 error_table = gr.Dataframe(
                     headers=["錯誤文字", "嚴重度", "分類"],
-                    label="當前句子錯誤紀錄",
                     datatype=["str", "str", "str"],
                     interactive=False
                 )
-        # ------------------- 第三行：錯誤標註相關區 -------------------
-        with gr.Row():
-            with gr.Column(scale=4):
-                error_span = gr.Textbox(
-                    label="錯誤區間 (可複製『翻譯文本』貼上)",
-                    lines=2
-                )
-            with gr.Column(scale=3):
                 category = gr.Dropdown(
                     label="錯誤類別",
                     choices=list(categories_display.keys()),
@@ -425,89 +511,91 @@ with gr.Blocks(css="""
                     choices=categories_display["正確性"],
                     value="誤譯"
                 )
-            with gr.Column(scale=3):
-                other = gr.Textbox(label="其他子類別(如選『其他』則在此填)")
                 severity = gr.Dropdown(
                     label="嚴重度",
                     choices=severity_choices_display,
                     value="輕微 (Minor)"
                 )
-        # ------------------- 第四行：錯誤標註按鈕區 -------------------
-        with gr.Row():
-            save_current_button = gr.Button("保存並繼續標記當前資料")
-            correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
-            too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
-        # ------------------- 第五行：建議翻譯 & 評分 & 送出 -------------------
-        with gr.Row():
-            alternative_translation = gr.Textbox(
-                label="建議翻譯 (如有更好譯法，可填)",
-                lines=2
-            )
-            score = gr.Slider(
-                label="翻譯評分 (0=最差, 100=最好)",
-                minimum=0,
-                maximum=100,
-                step=1,
-                value=66
-            )
             save_next_button = gr.Button("保存並顯示下一筆")
-        # ------------------- 狀態顯示 -------------------
         status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
-        # ------------------- 互動邏輯綁定 -------------------
-        def update_subcategories(selected_category):
             subcats = categories_display[selected_category]
             return gr.update(choices=subcats, value=subcats[0])
         file_selector.change(
-            update_file_selection,
-            inputs=[file_selector],
             outputs=[
-                source, target, error_span,
-                index_selector, current_index_display,
                 status, error_table, highlighted_target
             ]
         )
         index_selector.change(
-            update_index_selection,
-            inputs=[index_selector],
             outputs=[
-                source, target, current_index_display,
                 status, error_table, highlighted_target
             ]
         )
         category.change(
-            update_subcategories,
-            inputs=[category],
             outputs=[subcategory]
         )
         correct_button.click(
-            mark_as_correct,
             outputs=[error_span, status, error_table]
         )
         too_many_errors_button.click(
-            mark_as_too_many_errors,
             outputs=[error_span, status, error_table]
         )
-        # 「保存並繼續標記當前資料」(只是暫時往 current_errors 加)
         save_current_button.click(
-            save_current,
             inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
             outputs=[error_span, status]
         )
-        # 再次更新表格 & 高亮
         save_current_button.click(
             fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
             inputs=[target],
             outputs=[error_table, highlighted_target]
         )
-        # 「保存並顯示下一筆」(正式存檔到 JSON 並跳下一筆)
         save_next_button.click(
             save_and_next,
             inputs=[source, target, score, rater_selector, alternative_translation],
@@ -518,7 +606,7 @@ with gr.Blocks(css="""
             ]
         )
-        # 初始化畫面
         init_src, init_tgt = get_current_text()
         source.value = init_src
         target.value = init_tgt

 from uuid import uuid4
 import shutil
+# --------------------------- 中英對照的字典 ---------------------------
+# 後端儲存(English)，前端顯示(中文)
 category_map = {
     "正確性": "Accuracy",
     "流暢度": "Fluency",
     "風格": "Style",
     "在地化": "Locale"
 }
 subcategory_map = {
     ("正確性", "誤譯"): ("Accuracy", "Mistranslation"),
     ("正確性", "新增"): ("Accuracy", "Addition"),
     ("在地化", "地址格式"): ("Locale", "Address format"),
     ("在地化", "其他"): ("Locale", "Other"),
 }
 categories_display = {
     "正確性": ["誤譯", "新增", "漏譯", "其他"],
     "流暢度": ["文法", "拼字", "標點符號", "前後不一致", "語域", "其他"],
     "在地化": ["貨幣格式", "時間格式", "人名格式", "日期格式", "地址格式", "其他"]
 }
+severity_choices_display = ["輕微 (Minor)", "嚴重 (Major)"]
 severity_map = {
     "輕微 (Minor)": "Minor",
     "嚴重 (Major)": "Major"
 }
+# 這兩個字典用於前端顯示資料表時，把英文轉回中文顯示
+severity_display_map = {
+    "Minor": "輕微 (Minor)",
+    "Major": "嚴重 (Major)",
+    "No-error": "無錯誤",
+    "Non-translation": "過多錯誤"
+}
+category_display_map = {
+    "Accuracy": "正確性",
+    "Fluency": "流暢度",
+    "Terminology": "專有名詞",
+    "Style": "風格",
+    "Locale": "在地化",
+    "Other": "其他",
+    "No-error": "無錯誤",
+    "Non-translation": "過多錯誤"
+}
+# ---------------------------下載CSV資料檔--------------------------------
 DATASET_DIR = Path("json_dataset")
 DATASET_DIR.mkdir(parents=True, exist_ok=True)
 scheduler = CommitScheduler(
+    repo_id="350016z/TaiwanCOMET_dataset",
     repo_type="dataset",
     folder_path=DATASET_DIR,
     path_in_repo="data"
 )
 def download_dataset_file(dataset_id, local_dir):
     snapshot_path = snapshot_download(repo_id=dataset_id, repo_type="dataset")
     contents = os.listdir(snapshot_path)
     for file_name in contents:
         if file_name.endswith(".csv"):
             source_file_path = os.path.join(snapshot_path, file_name)
             local_file_path = os.path.join(local_dir, file_name)
             shutil.copy(source_file_path, local_file_path)
             time.sleep(1)
     return local_dir
     exit()
 data_path = os.path.join(current_dir, 'test.csv') if 'test.csv' in csv_files else os.path.join(current_dir, csv_files[0])
 if not os.path.exists(data_path):
     print(f"Error: {data_path} does not exist. Please check the file path.")
     exit()
 current_errors = []
 annotations_file = DATASET_DIR / f"test_annotations-{uuid4()}.json"
+annotation_history = []  # 若需顯示歷史可擴充
 def get_all_ids():
     """
             json.dump(entry, f, ensure_ascii=False)
             f.write("\n")
 def highlight_errors_in_text(text, errors):
     """
+    在文本中以 <span style="background-color:yellow;">...</span> 方式高亮。
     """
+    if not text:
+        return ""
     highlighted = ""
     last_end = 0
     for err in sorted(errors, key=lambda e: e["start"]):
     highlighted += text[last_end:]
     return highlighted
+def get_error_dataframe():
+    """
+    只顯示「錯誤文字」「嚴重度」「分類」(皆為中文顯示)，後端仍存英文。
+    """
+    df = pd.DataFrame(current_errors)
+    if df.empty:
+        return pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"])
+    display_df = pd.DataFrame()
+    # 顯示錯誤文字
+    display_df["錯誤文字"] = df["text"]
+    # 顯示嚴重度 (中文)
+    display_df["嚴重度"] = df["severity"].apply(lambda x: severity_display_map.get(x, x))
+    # 顯示分類 (中文)
+    # 後端儲存格式為 "Accuracy/Mistranslation" 或 "No-error"
+    def map_category(cat_str):
+        if cat_str in ["No-error", "Non-translation"]:
+            return severity_display_map.get(cat_str, cat_str)
+        if "/" not in cat_str:
+            # Single part (e.g. "Accuracy" or "Other")
+            return category_display_map.get(cat_str, cat_str)
+        main_cat, sub_cat = cat_str.split("/", 1)
+        main_cat_zh = category_display_map.get(main_cat, main_cat)
+        # sub_cat 可能是 "Mistranslation" or "Other" or "Addition"...
+        # 若需要更細，可再進一層 map；這裡示範單純中文對照
+        # 也可自行定義 sub_cat_map dict
+        sub_cat_zh = None
+        # 簡易示範: 逐一對照
+        if sub_cat == "Mistranslation":
+            sub_cat_zh = "誤譯"
+        elif sub_cat == "Addition":
+            sub_cat_zh = "新增"
+        elif sub_cat == "Omission":
+            sub_cat_zh = "漏譯"
+        elif sub_cat == "Grammar":
+            sub_cat_zh = "文法"
+        elif sub_cat == "Spelling":
+            sub_cat_zh = "拼字"
+        elif sub_cat == "Punctuation":
+            sub_cat_zh = "標點符號"
+        elif sub_cat == "Inconsistency":
+            sub_cat_zh = "前後不一致"
+        elif sub_cat == "Register":
+            sub_cat_zh = "語域"
+        elif sub_cat == "Inappropriate":
+            sub_cat_zh = "使用不當"
+        elif sub_cat == "Inconsistent":
+            sub_cat_zh = "不一致"
+        elif sub_cat == "Awkward":
+            sub_cat_zh = "用字笨拙"
+        elif sub_cat == "Currency format":
+            sub_cat_zh = "貨幣格式"
+        elif sub_cat == "Time format":
+            sub_cat_zh = "時間格式"
+        elif sub_cat == "Name format":
+            sub_cat_zh = "人名格式"
+        elif sub_cat == "Date format":
+            sub_cat_zh = "日期格式"
+        elif sub_cat == "Address format":
+            sub_cat_zh = "地址格式"
+        else:
+            # 若無對應就顯示原本
+            sub_cat_zh = sub_cat
+        return f"{main_cat_zh}/{sub_cat_zh}"
+    display_df["分類"] = df["category"].apply(map_category)
+    return display_df
 def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
     global current_index, data, current_errors
     if len(current_errors) >= 5:
+        return "", "您已標記超過 5 處錯誤，可直接按『過多錯誤』或繼續。"
     if error_span and error_span not in target:
         return "", "錯誤區間不存在於翻譯文本，請檢查！"
+    # 轉英文
     cat_val, subcat_val = subcategory_map.get((category, subcategory), (category_map.get(category, "Other"), "Other"))
     severity_val = severity_map.get(severity, "Minor")
     if error_span:
         start = target.find(error_span)
         end = start + len(error_span)
         for err in current_errors:
             if err["start"] == start and err["end"] == end:
+                return "", "此錯誤區間已標記過，請勿重複。"
+        if subcat_val == "Other" and other.strip():
+            # 如果子類別選『其他』且填了自訂內容
+            subcat_val = other.strip()
         current_errors.append({
             "text": error_span,
             "severity": severity_val,
             "start": start,
             "end": end,
+            "category": f"{cat_val}/{subcat_val}"
         })
+        return "", f"已標記錯誤: {error_span} (範圍 {start}-{end})"
     else:
+        return "", "尚未輸入錯誤區間，如無錯誤請按『完全正確』"
+def mark_as_correct():
+    global current_errors
+    current_errors.append({
+        "text": "",
+        "severity": "No-error",
+        "start": 0,
+        "end": 0,
+        "category": "No-error"
+    })
+    return "", "標註為完全正確！", get_error_dataframe()
+def mark_as_too_many_errors():
+    global current_errors
+    current_errors.append({
+        "text": "",
+        "severity": "Major",
+        "start": 0,
+        "end": 0,
+        "category": "Non-translation"
+    })
+    return "", "已標註為過多錯誤！", get_error_dataframe()
 def save_and_next(source, target, score, rater_selector, alternative_translation):
     global current_index, data, annotations_file, current_errors, annotation_history
     if not rater_selector:
         return (
+            source, target, "",  # return empty error_span
             str(data.loc[current_index, "id"]),
             "請先選擇標註人員！",
             get_error_dataframe(),
     if current_index >= len(data):
         return (
+            "已完成所有文本標記",  # source
+            "已完成所有文本標記",  # target
+            "",  # error_span
+            "",  # current_index_display
+            f"標記完成並儲存到 {annotations_file.name}！(共 {len(data)} 筆)",
             pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"]),
             ""
         )
     next_source, next_target = get_current_text()
+    status_msg = f"已提交！目前進度：第 {current_index} 筆 / 共 {len(data)} 筆。"
     return (
         next_source,
         next_target,
     selected_id = parse_id_from_display(selected_display)
     row_list = data.index[data["id"] == selected_id].tolist()
     if not row_list:
+        return (
+            "", "", str(selected_id),
+            f"找不到 id: {selected_id}",
+            get_error_dataframe(),
+            ""
+        )
     current_index = row_list[0]
     src, tgt = get_current_text()
+    return (
+        src, tgt,
+        str(selected_id),
+        f"已跳轉至 id={selected_id}",
+        get_error_dataframe(),
+        highlight_errors_in_text(tgt, current_errors)
+    )
+DEMO_EXPLANATION = """
+## 翻譯標記工具：階段性操作流程
+### 操作步驟
+1. **先選擇標註人員與檔案**，並在「索引」下拉中挑選要標註的句子。
+2. 在「步驟 1：錯誤標註」中，若翻譯文本有錯，請輸入「錯誤區間」、選擇「錯誤類別/子類別/嚴重度」並點「保存並繼續標記」。多個錯誤可重複此步驟；若無錯誤則可直接點「完全正確」。
+3. 錯誤標完後，在「步驟 2：評分與提交」中，拉動滑桿給分，若有更好譯文，可在「建議翻譯」填入。再按「保存並顯示下一筆」送出本句標註並進入下一句。
 """
 with gr.Blocks(css="""
+    /* 整體字體與行距 */
     * {
         font-size: 15px;
         line-height: 1.4;
         padding: 10px;
         margin-bottom: 10px;
     }
+    /* 按鈕分色 */
     #correct_button {
+        background-color: #4CAF50; /* 綠 */
         color: white;
         font-size: 14px;
         margin-bottom: 5px;
     }
     #too_many_errors_button {
+        background-color: #f44336; /* 紅 */
         color: white;
         font-size: 14px;
         margin-bottom: 5px;
     }
+    /* 螢光標示外層加框，便於視覺聚焦 */
+    #highlight_box {
+        border: 1px solid #aaa;
+        padding: 10px;
+        min-height: 80px;
+    }
 """) as demo:
     gr.Markdown(DEMO_EXPLANATION)
     with gr.Tab("標記工具"):
+        # ------------------- 頂部: 檔案 & 索引控制 -------------------
         with gr.Row():
             with gr.Column(scale=1):
                 rater_selector = gr.Dropdown(
                     value="test.csv"
                 )
                 index_selector = gr.Dropdown(
+                    label="選擇索引 (id-原文前10字)",
                     choices=get_all_ids(),
                     value=f"{data.loc[current_index, 'id']}-{str(data.loc[current_index, 'source'])[:10]}"
                 )
                     interactive=False
                 )
+            # 左: 原始文本 / 右: 翻譯文本
             with gr.Column(scale=4):
+                source = gr.Textbox(label="原始文本", lines=4, interactive=False)
             with gr.Column(scale=4):
+                target = gr.Textbox(label="翻譯文本", lines=4, interactive=False)
+        # ------------------- 螢光標記區（帶外框）＆錯誤紀錄表 -------------------
         with gr.Row():
             with gr.Column(scale=5):
+                with gr.Box(elem_id="highlight_box"):
+                    highlighted_target = gr.HTML(value="", label="螢光標示區 (已標註的錯誤)")
             with gr.Column(scale=5):
                 error_table = gr.Dataframe(
                     headers=["錯誤文字", "嚴重度", "分類"],
+                    label="當前句子錯誤紀錄 (中文顯示)",
                     datatype=["str", "str", "str"],
                     interactive=False
                 )
+        # ------------------- 步驟1：錯誤標註 -------------------
+        with gr.Box(elem_id="step1_box", css="panel"):
+            gr.Markdown("### 步驟 1：錯誤標註")
+            with gr.Row():
+                # 錯誤區間 / 錯誤類別 / 子類別 / 嚴重度
+                error_span = gr.Textbox(label="錯誤區間 (可複製『翻譯文本』貼上)", lines=2)
                 category = gr.Dropdown(
                     label="錯誤類別",
                     choices=list(categories_display.keys()),
                     choices=categories_display["正確性"],
                     value="誤譯"
                 )
+                other = gr.Textbox(label="其他子類別(如選『其他』則填寫)")
                 severity = gr.Dropdown(
                     label="嚴重度",
                     choices=severity_choices_display,
                     value="輕微 (Minor)"
                 )
+            with gr.Row():
+                save_current_button = gr.Button("保存並繼續標記當前資料")
+                correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
+                too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
+        # ------------------- 步驟2：評分與提交 -------------------
+        with gr.Box(elem_id="step2_box", css="panel"):
+            gr.Markdown("### 步驟 2：評分與提交")
+            with gr.Row():
+                alternative_translation = gr.Textbox(
+                    label="建議翻譯(如有更好譯法可填)",
+                    lines=2
+                )
+                score = gr.Slider(
+                    label="翻譯評分 (0=最差, 100=最好)",
+                    minimum=0,
+                    maximum=100,
+                    step=1,
+                    value=66
+                )
+            # 提交按鈕
             save_next_button = gr.Button("保存並顯示下一筆")
+        # 最下方: 狀態
         status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
+        # ------------------- 邏輯綁定 -------------------
+        def update_subcats(selected_category):
             subcats = categories_display[selected_category]
             return gr.update(choices=subcats, value=subcats[0])
         file_selector.change(
+            update_file_selection,
+            inputs=[file_selector],
             outputs=[
+                source, target, error_span,
+                index_selector, current_index_display,
                 status, error_table, highlighted_target
             ]
         )
         index_selector.change(
+            update_index_selection,
+            inputs=[index_selector],
             outputs=[
+                source, target, current_index_display,
                 status, error_table, highlighted_target
             ]
         )
         category.change(
+            update_subcats,
+            inputs=[category],
             outputs=[subcategory]
         )
         correct_button.click(
+            mark_as_correct,
             outputs=[error_span, status, error_table]
         )
         too_many_errors_button.click(
+            mark_as_too_many_errors,
             outputs=[error_span, status, error_table]
         )
+        # 按「保存並繼續標記」 -> 在同一句上加錯誤
         save_current_button.click(
+            save_current,
             inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
             outputs=[error_span, status]
         )
+        # 再更新表格 & 高亮
         save_current_button.click(
             fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
             inputs=[target],
             outputs=[error_table, highlighted_target]
         )
+        # 按「保存並顯示下一筆」 -> 送出當前整句標註 & 進下一句
         save_next_button.click(
             save_and_next,
             inputs=[source, target, score, rater_selector, alternative_translation],
             ]
         )
+        # 初始化介面
         init_src, init_tgt = get_current_text()
         source.value = init_src
         target.value = init_tgt