Spaces:

350016z
/

TranslationError_Gradio

Sleeping

App Files Files Community

350016z commited on Mar 4

Commit

b0d473b

verified ·

1 Parent(s): 437738a

Update app.py

Browse files

Files changed (1) hide show

app.py +447 -138

app.py CHANGED Viewed

@@ -9,6 +9,65 @@ from uuid import uuid4
 from datasets import load_dataset
 import shutil
 DATASET_DIR = Path("json_dataset")
 DATASET_DIR.mkdir(parents=True, exist_ok=True)
@@ -19,9 +78,7 @@ scheduler = CommitScheduler(
     path_in_repo="data"
 )
-# Loading dataset from HuggingFace -------------------------------------------------------------------------------------
 def download_dataset_file(dataset_id, local_dir):
-    # /home/user/.cache/huggingface/hub/datasets--350016z--Taiwanese_dataset/snapshots/22594253c63bd80e85b5255f948432014c37373a
     snapshot_path = snapshot_download(repo_id=dataset_id, repo_type="dataset")
     contents = os.listdir(snapshot_path)
@@ -33,10 +90,7 @@ def download_dataset_file(dataset_id, local_dir):
             shutil.copy(source_file_path, local_file_path)
             print(f"Copied {file_name} to {local_file_path}")
-            # Check file permissions
             print(f"Permissions for {local_file_path}: {oct(os.stat(local_file_path).st_mode)}")
             time.sleep(1)
     return local_dir
@@ -57,19 +111,33 @@ if not os.path.exists(data_path):
     print(f"Error: {data_path} does not exist. Please check the file path.")
     exit()
-# Loading & Setting --------------------------------------------------------------------------------------------------
-data = pd.read_csv(data_path, dtype={"id": "Int64"}) # 確保 id 為標準 Python int
 current_index = 0
 current_errors = []
 annotations_file = DATASET_DIR / f"test_annotations-{uuid4()}.json"
-# ---------------------------------------------------------------------------------------------------------------------
 def get_all_ids():
-    return [str(id) for id in data["id"].tolist()]
 def get_current_text():
     global current_index, data
     source = data.loc[current_index, "source"]
@@ -77,122 +145,211 @@ def get_current_text():
     return source, target
 def save_to_json(entry: dict, json_file: Path):
-    """
-    將資料保存到指定的 JSON 檔案，並推送到 Hugging Face Dataset。
-    """
     with scheduler.lock:
         with json_file.open("a") as f:
             json.dump(entry, f, ensure_ascii=False)
             f.write("\n")
         # scheduler.push_to_hub(commit_message=f"更新檔案 {json_file.name}")
 def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
     global current_index, data, current_errors
-    system = data.loc[current_index, "system"]
-    lp = data.loc[current_index, "lp"]
-    doc = data.loc[current_index, "doc"]
-    id = int(data.loc[current_index, "id"])
-    reference = data.loc[current_index, "reference"]
-    if subcategory:
-        if subcategory == "Other":
-            category_value = f"{category}/{other}"
-        else:
-            category_value = f"{category}/{subcategory}"
-    if error_span and error_span in target:
         start = target.find(error_span)
         end = start + len(error_span)
-        print(f"start: {start}, end: {end}")
     else:
-        return "", "錯誤區間不存在於翻譯文本中，請檢查！"
-    current_errors.append({
-        "text": error_span,
-        "severity": severity,
-        "start": start,
-        "end": end,
-        "category": category_value,
-    })
-    # [error_span, status]
-    return "", f"已記錄錯誤區間: {error_span}，範圍 {start}-{end}。"
-def save_and_next(source, target, score, rater_selector):
-    global current_index, data, annotations_file, current_errors
     system = data.loc[current_index, "system"]
     lp = data.loc[current_index, "lp"]
     doc = data.loc[current_index, "doc"]
-    id = int(data.loc[current_index, "id"])
     reference = data.loc[current_index, "reference"]
     annotations_entry = {
         "system": system,
         "lp": lp,
         "doc": doc,
-        "id": id,
         "rater": rater_selector,
         "src": source,
         "mt": target,
         "ref": reference,
         "esa_score": score,
         "esa_spans": current_errors,
     }
     save_to_json(annotations_entry, annotations_file)
-    # 清空當前錯誤緩存
     current_errors = []
     current_index += 1
     if current_index >= len(data):
-        return "已完成所有文本標記", "已完成所有文本標記", "", "", f"所有標記已完成並保存到 {annotations_file.name}！"
     next_source, next_target = get_current_text()
-    return next_source, next_target, "", str(current_index), f"分數與錯誤已保存到 {annotations_file.name}，請繼續下一筆！"
 def update_file_selection(selected_file):
-    global data_path, data, current_index, annotations_file
     data_path = os.path.join(current_dir, selected_file)
-    data = pd.read_csv(data_path)
-    id_list = [str(id) for id in sorted(data["id"].unique())]  # 轉為字串，確保 Gradio Dropdown 兼容
-    min_id = int(id_list[0])  # 取得最小的 ID
-    current_index = data.index[data["id"] == int(min_id)].tolist()[0]  # DataFrame 的行索引（row index）；而非檔案中的id
     file_base_name = os.path.splitext(selected_file)[0]
     annotations_file = DATASET_DIR / f"{file_base_name}_annotations-{uuid4()}.json"
-    # [source, target, error_span, index_selector, current_index_display, status]
-    return get_current_text() + ("", gr.update(choices=id_list, value=str(min_id)), str(min_id), f"已加載檔案：{selected_file}")
-def update_index_selection(selected_index):
-    global current_index, data
-    selected_index = int(selected_index)
-    current_index = data.index[data["id"] == selected_index].tolist()[0]
-    # [source, target, current_index_display, status]
-    return get_current_text() + (str(selected_index), f"已跳轉至 id: {selected_index}")
-categories = {
-    "Accuracy": ["Mistranslation", "Addition", "Omission", "Other"],
-    "Fluency": ["Grammar", "Spelling", "Punctuation", "Inconsistency", "Register", "Other"],
-    "Terminology": ["Inappropriate", "Inconsistent", "Other"],
-    "Style": ["Awkward", "Other"],
-    "Locale": ["Currency format", "Time format", "Name format", "Date format", "Address format", "Other"],
-}
-severity_choices = ["Minor", "Major"]
-rater = ['rater1', 'rater2','rater3', 'rater4', 'rater5', 'rater6', 'rater7']
 def mark_as_correct():
     global current_errors
     current_errors.append({
         "text": "",
         "severity": "No-error",
@@ -200,11 +357,14 @@ def mark_as_correct():
         "end": 0,
         "category": "No-error"
     })
-    return "", "標註為完全正確，無錯誤！"
 def mark_as_too_many_errors():
     global current_errors
     current_errors.append({
         "text": "",
         "severity": "Major",
@@ -212,104 +372,253 @@ def mark_as_too_many_errors():
         "end": 0,
         "category": "Non-translation"
     })
-    return "", "已標註為過多錯誤！"
 DEMO_EXPLANATION = """
 ## 翻譯標記工具
-### 使用規則 [更多細節](https://huggingface.co/spaces/350016z/TranslationError_Gradio/blob/main/README.md)
-1. **開始作業**
-    - 在「標註人員」選擇您的編號以識別。
-    - 左側「原始文本」顯示原文，右側「翻譯文本」為機器翻譯結果，請檢查右側內容是否有錯誤。
-2. **錯誤標註**
-    - 發現翻譯錯誤時，將錯誤部分標註到「錯誤區間」欄位，錯誤需連接成最長可能區間，若中間有正確翻譯，需分段標註，避免連續標記。
-    - 若有多處錯誤，可逐一標註並點擊「保存並繼續標記當前資料」後繼續修正。
-    - 若錯誤超過五處，直接按下「過多錯誤」按鈕，再進行後續的評分。
-    - 若無錯誤，直接按下「完全正確」按鈕，再進行後續的評分。
-3. **評分**
-    - 標記完所有錯誤區間以後，對每個翻譯文本的整體品質進行評分 (0-100分，0分最差，100分最好)。
-        - 0：幾乎沒有保留原文意思，大部分資訊遺失。
-        - 33：保留部分原文意思，但有明顯遺漏，敘述難以理解，文法可能很差。
-        - 66：保留大部分原文意思，有一些文法錯誤或輕微不一致。
-        - 100：原文意思和文法完全正確。
-        (即使選擇 **「完全正確」**，分數也不一定需要評100分)
 """
 with gr.Blocks(css="""
     #correct_button {
-        background-color: #4CAF50;
         color: white;
-        font-size: 12px;
         padding: 5px 5px;
         border-radius: 5px;
         min-height: 0px;
     }
     #too_many_errors_button {
-        background-color: #f44336;
         color: white;
-        font-size: 12px;
         padding: 5px 5px;
         border-radius: 5px;
         min-height: 0px;
     }
 """) as demo:
     gr.Markdown(DEMO_EXPLANATION)
-    with gr.Tab("標記工具"):
         with gr.Row():
             with gr.Column(scale=1):
-                rater_selector = gr.Dropdown(label="標註人員", choices=rater, value="rater1")
-                file_selector = gr.Dropdown(label="選擇檔案", choices=csv_files, value="test.csv")
-                index_selector = gr.Dropdown(label="選擇索引", choices=get_all_ids())
-                current_index_display = gr.Textbox(label="當前索引", value=str(current_index), interactive=False)
             with gr.Column(scale=8):
-                source = gr.Textbox(label="原始文本", lines=15, interactive=False)
             with gr.Column(scale=8):
-                target = gr.Textbox(label="翻譯文本", lines=15, interactive=False)
         with gr.Row(variant='panel', equal_height=True):
             with gr.Column(scale=3):
-                error_span = gr.Textbox(label="錯誤區間 (💡可以直接複製「翻譯文本」欄位，並在此貼上)", lines=6, placeholder="請輸入翻譯中文本的錯誤區間 (如無錯誤則不需)")
             with gr.Column(scale=3):
                 with gr.Row(equal_height=True):
-                    category = gr.Dropdown(label="錯誤類別", choices=list(categories.keys()), value="Accuracy")
-                    subcategory = gr.Dropdown(label="子類別", choices=categories["Accuracy"], value="Mistranslation")
-                    other = gr.Textbox(label="其他子類別", placeholder="若無法歸類，請填寫其他")
-                with gr.Row(equal_height=True):
-                    severity = gr.Radio(label="錯誤嚴重程度", choices=severity_choices, value="Minor")
                     save_current_button = gr.Button("保存並繼續標記當前資料")
-            with gr.Column(scale=1):
                 correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
                 too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
         with gr.Row(variant='panel', equal_height=True):
-            with gr.Column(scale=8):
-                score = gr.Slider(label="翻譯評分", minimum=0, maximum=100, step=1, value=66)
             with gr.Column(scale=1):
                 save_next_button = gr.Button("保存並顯示下一筆")
         status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
         def update_subcategories(selected_category):
-            subcategories = categories[selected_category]
             if subcategories:
                 return gr.update(choices=subcategories, value=subcategories[0])
             else:
                 return gr.update(choices=[], value=None)
-        file_selector.change(update_file_selection, inputs=[file_selector], outputs=[source, target, error_span, index_selector, current_index_display, status])
-        index_selector.change(update_index_selection, inputs=[index_selector], outputs=[source, target, current_index_display, status])
-        category.change(update_subcategories, inputs=[category], outputs=[subcategory])
-        correct_button.click(mark_as_correct, outputs=[error_span, status])
-        too_many_errors_button.click(mark_as_too_many_errors, outputs=[error_span, status])
-        save_current_button.click(save_current, inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other], outputs=[error_span, status])
-        save_next_button.click(save_and_next, inputs=[source, target, score, rater_selector], outputs=[source, target, error_span, current_index_display, status])
-        original, translated = get_current_text()
-        source.value = original
-        target.value = translated
-demo.launch()

 from datasets import load_dataset
 import shutil
+# ------------------------- 更新：新增顯示用與儲存用的雙層字典 (中文顯示 → 英文儲存) -------------------------
+# 以下為「錯誤類別」(category) 與「子類別」(subcategory) 的中英對照
+category_map = {
+    "正確性": "Accuracy",
+    "流暢度": "Fluency",
+    "專有名詞": "Terminology",
+    "風格": "Style",
+    "在地化": "Locale"
+}
+subcategory_map = {
+    # 正確性
+    ("正確性", "誤譯"): ("Accuracy", "Mistranslation"),
+    ("正確性", "新增"): ("Accuracy", "Addition"),
+    ("正確性", "漏譯"): ("Accuracy", "Omission"),
+    ("正確性", "其他"): ("Accuracy", "Other"),
+    # 流暢度
+    ("流暢度", "文法"): ("Fluency", "Grammar"),
+    ("流暢度", "拼字"): ("Fluency", "Spelling"),
+    ("流暢度", "標點符號"): ("Fluency", "Punctuation"),
+    ("流暢度", "前後不一致"): ("Fluency", "Inconsistency"),
+    ("流暢度", "語域"): ("Fluency", "Register"),
+    ("流暢度", "其他"): ("Fluency", "Other"),
+    # 專有名詞
+    ("專有名詞", "使用不當"): ("Terminology", "Inappropriate"),
+    ("專有名詞", "不一致"): ("Terminology", "Inconsistent"),
+    ("專有名詞", "其他"): ("Terminology", "Other"),
+    # 風格
+    ("風格", "用字笨拙"): ("Style", "Awkward"),
+    ("風格", "其他"): ("Style", "Other"),
+    # 在地化
+    ("在地化", "貨幣格式"): ("Locale", "Currency format"),
+    ("在地化", "時間格式"): ("Locale", "Time format"),
+    ("在地化", "人名格式"): ("Locale", "Name format"),
+    ("在地化", "日期格式"): ("Locale", "Date format"),
+    ("在地化", "地址格式"): ("Locale", "Address format"),
+    ("在地化", "其他"): ("Locale", "Other"),
+}
+# 這些為前端顯示的中文選項，對應到上面 map 中的 key
+categories_display = {
+    "正確性": ["誤譯", "新增", "漏譯", "其他"],
+    "流暢度": ["文法", "拼字", "標點符號", "前後不一致", "語域", "其他"],
+    "專有名詞": ["使用不當", "不一致", "其他"],
+    "風格": ["用字笨拙", "其他"],
+    "在地化": ["貨幣格式", "時間格式", "人名格式", "日期格式", "地址格式", "其他"]
+}
+severity_choices_display = ["輕微 (Minor)", "嚴重 (Major)"]  # 仍然儲存成 Minor / Major
+severity_map = {
+    "輕微 (Minor)": "Minor",
+    "嚴重 (Major)": "Major"
+}
+# ---------------------------------- 其餘程式基本結構不變 -------------------------------------
 DATASET_DIR = Path("json_dataset")
 DATASET_DIR.mkdir(parents=True, exist_ok=True)
     path_in_repo="data"
 )
 def download_dataset_file(dataset_id, local_dir):
     snapshot_path = snapshot_download(repo_id=dataset_id, repo_type="dataset")
     contents = os.listdir(snapshot_path)
             shutil.copy(source_file_path, local_file_path)
             print(f"Copied {file_name} to {local_file_path}")
             print(f"Permissions for {local_file_path}: {oct(os.stat(local_file_path).st_mode)}")
             time.sleep(1)
     return local_dir
     print(f"Error: {data_path} does not exist. Please check the file path.")
     exit()
+data = pd.read_csv(data_path, dtype={"id": "Int64"})  # 確保 id 為標準 Python int
 current_index = 0
 current_errors = []
 annotations_file = DATASET_DIR / f"test_annotations-{uuid4()}.json"
+# --------------------- 改善：可顯示歷史紀錄並顯示錯誤區間狀態 ---------------------
+# 新增一個資料結構「annotation_history」用來暫存所有標記結果
+annotation_history = []
 def get_all_ids():
+    # 為改善索引檢索效率，將「id + source(前10字)」當作顯示文字
+    # 實際上還是要存回單純的 id，後續要解析
+    id_list = []
+    for i in range(len(data)):
+        idx_value = data.loc[i, "id"]
+        src_text = str(data.loc[i, "source"])[:10].replace("\n", " ")
+        display_str = f"{idx_value}-{src_text}"
+        id_list.append(display_str)
+    return id_list
+def parse_id_from_display(display_str):
+    # 從 "id-前10字" 中分離出真正的 id
+    # 假設固定結構「{id}-{some_text}」
+    return int(display_str.split("-", 1)[0])
 def get_current_text():
     global current_index, data
     source = data.loc[current_index, "source"]
     return source, target
 def save_to_json(entry: dict, json_file: Path):
     with scheduler.lock:
         with json_file.open("a") as f:
             json.dump(entry, f, ensure_ascii=False)
             f.write("\n")
+        # 如需立即Push則取消註解
         # scheduler.push_to_hub(commit_message=f"更新檔案 {json_file.name}")
+# -------------------------- 新增：將錯誤標示記錄在前端介面 --------------------------
+def get_error_dataframe():
+    """
+    回傳當前暫存的錯誤清單，用於在前端顯示(例如用 DataFrame)。
+    """
+    # current_errors 為同一筆資料內的錯誤紀錄
+    # annotation_history 為已經提交到某筆資料(點下一筆)的紀錄
+    df = pd.DataFrame(current_errors)
+    if df.empty:
+        return pd.DataFrame(columns=["text", "severity", "start", "end", "category"])
+    return df[["text", "severity", "start", "end", "category"]]
+# ---------------------- 高亮顯示錯誤區間 (基於 HTML) 的示範 ----------------------
+def highlight_errors_in_text(text, errors):
+    """
+    在文本中以 <span style="background-color:yellow;"> 標示錯誤區間。
+    此功能受限於前端顯示，只能在 HTML 環境下顯示，Gradio Textbox 內不支援 HTML。
+    """
+    highlighted = ""
+    last_end = 0
+    for err in sorted(errors, key=lambda e: e["start"]):
+        st = err["start"]
+        ed = err["end"]
+        # 防呆：若 st/ed 超出範圍則跳過
+        if st < 0 or ed > len(text):
+            continue
+        highlighted += text[last_end:st]
+        # 醒目顏色
+        highlighted += f"<span style='background-color:yellow;'>{text[st:ed]}</span>"
+        last_end = ed
+    highlighted += text[last_end:]
+    return highlighted
 def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
     global current_index, data, current_errors
+    # 如果已經標註超過 5 處，這裡可再詢問使用者(Gradio 無法直接 alert/confirm)，暫以提示方式
+    if len(current_errors) >= 5:
+        return "", "您已標記超過 5 處錯誤，如錯誤非常多，可直接按「過多錯誤」，或繼續標注。"
+    # 防呆：若 error_span 內容不存在於 target 中
+    if error_span and error_span not in target:
+        return "", "錯誤區間不存在於翻譯文本中，請檢查！"
+    # 轉換 category, subcategory 為英文
+    cat_val, subcat_val = subcategory_map.get((category, subcategory), (category_map.get(category, "Other"), "Other"))
+    # 轉換 severity 為英文
+    severity_val = severity_map.get(severity, "Minor")
+    if error_span:
         start = target.find(error_span)
         end = start + len(error_span)
+        # 若已經標記過相同範圍，就跳過(防重複)
+        for err in current_errors:
+            if err["start"] == start and err["end"] == end:
+                return "", "此錯誤區間已經標記過，請勿重複標記。"
+        current_errors.append({
+            "text": error_span,
+            "severity": severity_val,
+            "start": start,
+            "end": end,
+            "category": f"{cat_val}/{subcat_val}" if subcat_val != "Other" else f"{cat_val}/{other}" if other else f"{cat_val}/Other",
+        })
+        return "", f"已記錄錯誤區間: {error_span}，範圍 {start}-{end}。"
     else:
+        return "", "請輸入錯誤區間，或選擇『完全正確』按鈕。"
+def save_and_next(source, target, score, rater_selector, alternative_translation):
+    global current_index, data, annotations_file, current_errors, annotation_history
+    # 防呆：若未填寫評分/標註人員
+    if rater_selector is None or rater_selector.strip() == "":
+        return (
+            source,
+            target,
+            "",  # error_span
+            str(data.loc[current_index, "id"]),
+            f"請先選擇標註人員 (rater)！",
+            get_error_dataframe(),
+            highlight_errors_in_text(target, current_errors)
+        )
+    if score is None:
+        return (
+            source,
+            target,
+            "",
+            str(data.loc[current_index, "id"]),
+            f"請先填寫評分！",
+            get_error_dataframe(),
+            highlight_errors_in_text(target, current_errors)
+        )
     system = data.loc[current_index, "system"]
     lp = data.loc[current_index, "lp"]
     doc = data.loc[current_index, "doc"]
+    id_val = int(data.loc[current_index, "id"])
     reference = data.loc[current_index, "reference"]
     annotations_entry = {
         "system": system,
         "lp": lp,
         "doc": doc,
+        "id": id_val,
         "rater": rater_selector,
         "src": source,
         "mt": target,
         "ref": reference,
         "esa_score": score,
         "esa_spans": current_errors,
+        "alternative_translation": alternative_translation if alternative_translation else ""
     }
     save_to_json(annotations_entry, annotations_file)
+    # 儲存到前端「歷史紀錄」以便用戶回顧
+    annotation_history.append(annotations_entry)
+    # 清空當前錯誤紀錄
     current_errors = []
     current_index += 1
     if current_index >= len(data):
+        return (
+            "已完成所有文本標記",
+            "已完成所有文本標記",
+            "",
+            "",
+            f"所有標記已完成並保存到 {annotations_file.name}！ (共 {len(data)} 筆)",
+            pd.DataFrame(),  # 空表
+            ""
+        )
     next_source, next_target = get_current_text()
+    # 回傳下一筆資訊，並顯示已完成幾筆 / 共幾筆
+    status_msg = f"評分與標記已提交！已完成第 {current_index} 筆 / 共 {len(data)} 筆。"
+    return (
+        next_source,
+        next_target,
+        "",
+        str(data.loc[current_index, "id"]),
+        status_msg,
+        pd.DataFrame(),  # 新的一筆錯誤紀錄預設空
+        ""  # 沒有錯誤高亮
+    )
 def update_file_selection(selected_file):
+    global data_path, data, current_index, annotations_file, current_errors, annotation_history
     data_path = os.path.join(current_dir, selected_file)
+    data = pd.read_csv(data_path, dtype={"id":"Int64"})
+    current_errors = []
+    annotation_history = []
+    # 重新定位 current_index = 第一行 (或最小id)
+    min_id = data["id"].min()
+    current_index = data.index[data["id"] == min_id].tolist()[0]
     file_base_name = os.path.splitext(selected_file)[0]
+    # 產生新的 annotations_file
     annotations_file = DATASET_DIR / f"{file_base_name}_annotations-{uuid4()}.json"
+    src, tgt = get_current_text()
+    return (
+        src,  # source
+        tgt,  # target
+        "",   # error_span
+        gr.update(choices=get_all_ids(), value=f"{min_id}-{str(src)[:10]}"), # index_selector
+        str(data.loc[current_index, "id"]),  # current_index_display
+        f"已加載檔案：{selected_file}",
+        pd.DataFrame(columns=["text","severity","start","end","category"]),
+        highlight_errors_in_text(tgt, [])  # 高亮為空
+    )
+def update_index_selection(selected_display):
+    global current_index, data, current_errors
+    # 從 "id-內容" 解析出真正的 id
+    selected_id = parse_id_from_display(selected_display)
+    # 找到對應行
+    row_list = data.index[data["id"] == selected_id].tolist()
+    if not row_list:
+        return (
+            "", "", str(selected_id),
+            f"找不到id: {selected_id}",
+            get_error_dataframe(),
+            ""
+        )
+    current_index = row_list[0]
+    src, tgt = get_current_text()
+    return (
+        src,
+        tgt,
+        str(selected_id),
+        f"已跳轉至 id: {selected_id}",
+        get_error_dataframe(),
+        highlight_errors_in_text(tgt, current_errors)
+    )
 def mark_as_correct():
     global current_errors
+    # 標註無錯誤
     current_errors.append({
         "text": "",
         "severity": "No-error",
         "end": 0,
         "category": "No-error"
     })
+    return (
+        "",
+        "標註為完全正確，無錯誤！",
+        get_error_dataframe()
+    )
 def mark_as_too_many_errors():
     global current_errors
     current_errors.append({
         "text": "",
         "severity": "Major",
         "end": 0,
         "category": "Non-translation"
     })
+    return (
+        "",
+        "已標註為過多錯誤！",
+        get_error_dataframe()
+    )
+# ------------------------- 新增：提供一個「建議翻譯」欄位 -------------------------
+# ------------------------- 新增：在界面加上較明顯的評分標準提示 -------------------
 DEMO_EXPLANATION = """
 ## 翻譯標記工具
+### 使用規則
+1. **開始作業**
+   - 在「標註人員」選擇您的編號以識別。
+   - 下方「原始文本」顯示原文，右側「翻譯文本」為機器翻譯結果，請仔細檢查右側翻譯並標註錯誤。
+2. **錯誤標註**
+   - 若發現翻譯錯誤，請在「錯誤區間」欄位填入此錯誤在「翻譯文本」中的對應文字。
+   - 選擇「錯誤類別」、「子類別」，以及「錯誤嚴重程度」。
+   - 按下「保存並繼續標記當前資料」即可臨時儲存。
+   - 若錯誤超過五處，請使用「過多錯誤」按鈕(標註為 Major/Non-translation)。
+   - 若無任何錯誤，可直接按「完全正確」。
+   - **系統將在畫面下方顯示錯誤紀錄**，避免重複標記或遺漏。
+3. **評分** (0–100)
+   - 0分：幾乎無法理解，大部分意思遺失。
+   - 33分：保留部分原文意思，有明顯遺漏，句子不流暢或文法差。
+   - 66分：大部分原文意思保留，僅部分文法瑕疵或不一致。
+   - 100分：完全保留原文意思，語句通順無誤。
+   - 註：就算選擇「完全正確」，也可酌情給分，例如 90 或 100。
+4. **建議翻譯**
+   - 若您有更好的譯文想法，可在「建議翻譯」輸入框提供，利於後續改進翻譯品質。
+5. **送出與查看進度**
+   - 按「保存並顯示下一筆」後，系統會進行保存並顯示下一筆資料。
+   - 在畫面下方之「當前狀態」會顯示目前進度，例如「已完成第 X 筆 / 共 Y 筆」。
+6. **注意**
+   - 若���要跳至其他索引，可在「選擇索引」裡選擇。請留意：存檔後才會保留當前標記與評分。
+   - 此平台暫無法動態調整全部字體大小；若有視覺需要，可放大瀏覽器或按下 Ctrl + 滑鼠滾輪。
+以上說明若有不足，請直接留言反饋。
 """
 with gr.Blocks(css="""
+    /* 提高整體字體大小 (部分瀏覽器可能需自行縮放) */
+    * {
+        font-size: 16px;
+    }
+    /* 分區的樣式調整 */
+    .panel {
+        border: 1px solid #ccc;
+        padding: 10px;
+    }
+    /* 按鈕樣式區分 */
     #correct_button {
+        background-color: #4CAF50; /* 綠色 */
         color: white;
+        font-size: 14px;
         padding: 5px 5px;
         border-radius: 5px;
         min-height: 0px;
+        margin-bottom: 10px;
     }
     #too_many_errors_button {
+        background-color: #f44336; /* 紅色 */
         color: white;
+        font-size: 14px;
         padding: 5px 5px;
         border-radius: 5px;
         min-height: 0px;
+        margin-bottom: 10px;
+    }
+    /* 優化下拉清單字體 */
+    label, select {
+        font-size: 16px;
     }
 """) as demo:
     gr.Markdown(DEMO_EXPLANATION)
+    with gr.Tab("標記工具"):
+        # ------------------- 上方: 檔案與索引選擇 -------------------
         with gr.Row():
             with gr.Column(scale=1):
+                rater_selector = gr.Dropdown(
+                    label="標註人員",
+                    choices=["rater1", "rater2", "rater3", "rater4", "rater5", "rater6", "rater7"],
+                    value="rater1"
+                )
+                file_selector = gr.Dropdown(
+                    label="選擇檔案",
+                    choices=csv_files,
+                    value="test.csv"
+                )
+                index_selector = gr.Dropdown(
+                    label="選擇索引 (顯示: id-原文前10字)",
+                    choices=get_all_ids(),
+                    value=f"{data.loc[current_index, 'id']}-{str(data.loc[current_index, 'source'])[:10]}"
+                )
+                current_index_display = gr.Textbox(
+                    label="當前索引(id)",
+                    value=str(data.loc[current_index, "id"]),
+                    interactive=False
+                )
+            # ------------------- 中間: 原文 & 右側: 機翻 -------------------
             with gr.Column(scale=8):
+                source = gr.Textbox(label="原始文本", lines=6, interactive=False)
             with gr.Column(scale=8):
+                target = gr.Textbox(label="翻譯文本", lines=6, interactive=False)
+        # ------------------- 顯示錯誤高亮 -------------------
+        with gr.Row():
+            # 高亮後的翻譯文本(只讀, HTML 顯示)
+            with gr.Column(scale=8):
+                highlighted_target = gr.HTML(label="錯誤高亮顯示(僅供參考)")
+        # ------------------- 中段: 錯誤標註相關 -------------------
         with gr.Row(variant='panel', equal_height=True):
             with gr.Column(scale=3):
+                error_span = gr.Textbox(
+                    label="錯誤區間 (請直接複製『翻譯文本』文字貼上)",
+                    lines=3,
+                    placeholder="如無錯誤，可按『完全正確』"
+                )
             with gr.Column(scale=3):
                 with gr.Row(equal_height=True):
+                    category = gr.Dropdown(
+                        label="錯誤類別",
+                        choices=list(categories_display.keys()),
+                        value="正確性"
+                    )
+                    subcategory = gr.Dropdown(
+                        label="子類別",
+                        choices=categories_display["正確性"],
+                        value="誤譯"
+                    )
+                with gr.Row(equal_height=True):
+                    other = gr.Textbox(label="其他子類別", placeholder="若無法歸類，請填寫")
+                    severity = gr.Dropdown(
+                        label="錯誤嚴重程度",
+                        choices=severity_choices_display,
+                        value="輕微 (Minor)"
+                    )
+                with gr.Row():
                     save_current_button = gr.Button("保存並繼續標記當前資料")
+            with gr.Column(scale=2):
                 correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
                 too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
+            # ----------------- 錯誤紀錄表格 -----------------
+            with gr.Column(scale=4):
+                error_table = gr.Dataframe(
+                    headers=["text", "severity", "start", "end", "category"],
+                    label="當前句子錯誤紀錄",
+                    datatype=["str", "str", "number", "number", "str"],
+                    interactive=False
+                )
+        # ------------------- 建議翻譯與評分 -------------------
         with gr.Row(variant='panel', equal_height=True):
+            with gr.Column(scale=4):
+                alternative_translation = gr.Textbox(
+                    label="建議翻譯 (如有更適合的譯文，可在此提供)",
+                    lines=2
+                )
+            with gr.Column(scale=4):
+                score = gr.Slider(
+                    label="翻譯評分 (0=最差, 100=最好)",
+                    minimum=0,
+                    maximum=100,
+                    step=1,
+                    value=66
+                )
             with gr.Column(scale=1):
                 save_next_button = gr.Button("保存並顯示下一筆")
+        # ------------------- 最下方: 狀態列 -------------------
         status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
+        # ------------------- 事件處理 -------------------
         def update_subcategories(selected_category):
+            subcategories = categories_display[selected_category]
             if subcategories:
                 return gr.update(choices=subcategories, value=subcategories[0])
             else:
                 return gr.update(choices=[], value=None)
+        file_selector.change(
+            update_file_selection,
+            inputs=[file_selector],
+            outputs=[
+                source, target, error_span,
+                index_selector, current_index_display,
+                status, error_table, highlighted_target
+            ]
+        )
+        index_selector.change(
+            update_index_selection,
+            inputs=[index_selector],
+            outputs=[
+                source, target,
+                current_index_display, status,
+                error_table, highlighted_target
+            ]
+        )
+        category.change(
+            update_subcategories,
+            inputs=[category],
+            outputs=[subcategory]
+        )
+        correct_button.click(
+            mark_as_correct,
+            outputs=[error_span, status, error_table]
+        )
+        too_many_errors_button.click(
+            mark_as_too_many_errors,
+            outputs=[error_span, status, error_table]
+        )
+        save_current_button.click(
+            save_current,
+            inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
+            outputs=[error_span, status]
+        )
+        # 當保存當前錯誤後，也要更新錯誤表格和高亮
+        save_current_button.click(
+            fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
+            inputs=[target],
+            outputs=[error_table, highlighted_target]
+        )
+        save_next_button.click(
+            save_and_next,
+            inputs=[source, target, score, rater_selector, alternative_translation],
+            outputs=[
+                source, target, error_span,
+                current_index_display, status,
+                error_table, highlighted_target
+            ]
+        )
+        # 預設載入時顯示
+        initial_src, initial_tgt = get_current_text()
+        source.value = initial_src
+        target.value = initial_tgt
+        error_table.value = pd.DataFrame(columns=["text","severity","start","end","category"])
+        highlighted_target.value = highlight_errors_in_text(initial_tgt, [])
+demo.launch()