Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -192,6 +192,7 @@ def get_error_dataframe():
|
|
192 |
main_cat, sub_cat = cat_str.split("/", 1)
|
193 |
main_cat_zh = category_display_map.get(main_cat, main_cat)
|
194 |
# sub_cat -> e.g. "Mistranslation", "Addition", "Omission", ...
|
|
|
195 |
if sub_cat == "Mistranslation":
|
196 |
sub_cat_zh = "誤譯"
|
197 |
elif sub_cat == "Addition":
|
@@ -292,7 +293,7 @@ def save_and_next(source, target, score, rater_selector, alternative_translation
|
|
292 |
|
293 |
if not rater_selector:
|
294 |
return (
|
295 |
-
source, target, "",
|
296 |
str(data.loc[current_index, "id"]),
|
297 |
"請先選擇標註人員!",
|
298 |
get_error_dataframe(),
|
@@ -334,10 +335,10 @@ def save_and_next(source, target, score, rater_selector, alternative_translation
|
|
334 |
|
335 |
if current_index >= len(data):
|
336 |
return (
|
337 |
-
"已完成所有文本標記",
|
338 |
-
"已完成所有文本標記",
|
339 |
-
"",
|
340 |
-
"",
|
341 |
f"標記完成並儲存到 {annotations_file.name}!(共 {len(data)} 筆)",
|
342 |
pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"]),
|
343 |
""
|
@@ -401,154 +402,159 @@ def update_index_selection(selected_display):
|
|
401 |
)
|
402 |
|
403 |
DEMO_EXPLANATION = """
|
404 |
-
##
|
405 |
-
|
406 |
-
|
|
|
|
|
|
|
|
|
|
|
407 |
"""
|
408 |
|
409 |
with gr.Blocks(css="""
|
410 |
-
/*
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
#
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
#
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
#
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
#
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
}
|
447 |
-
|
448 |
-
|
449 |
-
#
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
}
|
454 |
-
#highlight_panel {
|
455 |
-
border: 1px solid #aaa;
|
456 |
-
padding: 10px;
|
457 |
-
min-height: 80px;
|
458 |
-
}
|
459 |
""") as demo:
|
460 |
gr.Markdown(DEMO_EXPLANATION)
|
461 |
|
462 |
-
# ------------------- 頂部:
|
463 |
-
with gr.Group(elem_id="outer_panel"):
|
464 |
-
gr.Markdown("### 基本設定")
|
465 |
-
with gr.Row():
|
466 |
-
with gr.Column(scale=1):
|
467 |
-
rater_selector = gr.Dropdown(
|
468 |
-
label="標註人員",
|
469 |
-
choices=["rater1", "rater2", "rater3", "rater4", "rater5", "rater6", "rater7"],
|
470 |
-
value="rater1"
|
471 |
-
)
|
472 |
-
file_selector = gr.Dropdown(
|
473 |
-
label="選擇檔案",
|
474 |
-
choices=csv_files,
|
475 |
-
value="test.csv"
|
476 |
-
)
|
477 |
-
index_selector = gr.Dropdown(
|
478 |
-
label="選擇索引 (id-原文前10字)",
|
479 |
-
choices=get_all_ids(),
|
480 |
-
value=f"{data.loc[current_index, 'id']}-{str(data.loc[current_index, 'source'])[:10]}"
|
481 |
-
)
|
482 |
-
current_index_display = gr.Textbox(
|
483 |
-
label="當前索引(id)",
|
484 |
-
value=str(data.loc[current_index, "id"]),
|
485 |
-
interactive=False
|
486 |
-
)
|
487 |
-
with gr.Column(scale=4):
|
488 |
-
source = gr.Textbox(label="原始文本", lines=4, interactive=False)
|
489 |
-
with gr.Column(scale=4):
|
490 |
-
target = gr.Textbox(label="翻譯文本", lines=4, interactive=False)
|
491 |
-
|
492 |
-
# ------------------- 中段: 螢光標記區 + 錯誤表格 -------------------
|
493 |
with gr.Row():
|
494 |
-
with gr.Column(scale=
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
headers=["錯誤文字", "嚴重度", "分類"],
|
500 |
-
label="當前句子錯誤紀錄 (中文顯示)",
|
501 |
-
datatype=["str", "str", "str"],
|
502 |
-
interactive=False
|
503 |
)
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
with gr.Row():
|
509 |
-
error_span = gr.Textbox(label="錯誤區間 (可複製『翻譯文本』貼上)", lines=2)
|
510 |
-
category = gr.Dropdown(
|
511 |
-
label="錯誤類別",
|
512 |
-
choices=list(categories_display.keys()),
|
513 |
-
value="正確性"
|
514 |
)
|
515 |
-
|
516 |
-
label="
|
517 |
-
choices=
|
518 |
-
value="
|
519 |
)
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
value="輕微 (Minor)"
|
525 |
)
|
526 |
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
|
|
531 |
|
532 |
-
|
533 |
-
|
534 |
-
gr.Markdown("### 評分與提交")
|
535 |
with gr.Row():
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
)
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
548 |
|
|
|
549 |
status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
|
550 |
|
551 |
-
# -------------------
|
552 |
def update_subcats(selected_category):
|
553 |
subcats = categories_display[selected_category]
|
554 |
return gr.update(choices=subcats, value=subcats[0])
|
@@ -585,19 +591,20 @@ body {
|
|
585 |
outputs=[error_span, status, error_table]
|
586 |
)
|
587 |
|
588 |
-
#
|
589 |
save_current_button.click(
|
590 |
save_current,
|
591 |
inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
|
592 |
outputs=[error_span, status]
|
593 |
)
|
|
|
594 |
save_current_button.click(
|
595 |
fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
|
596 |
inputs=[target],
|
597 |
outputs=[error_table, highlighted_target]
|
598 |
)
|
599 |
|
600 |
-
#
|
601 |
save_next_button.click(
|
602 |
save_and_next,
|
603 |
inputs=[source, target, score, rater_selector, alternative_translation],
|
@@ -608,7 +615,7 @@ body {
|
|
608 |
]
|
609 |
)
|
610 |
|
611 |
-
#
|
612 |
init_src, init_tgt = get_current_text()
|
613 |
source.value = init_src
|
614 |
target.value = init_tgt
|
|
|
192 |
main_cat, sub_cat = cat_str.split("/", 1)
|
193 |
main_cat_zh = category_display_map.get(main_cat, main_cat)
|
194 |
# sub_cat -> e.g. "Mistranslation", "Addition", "Omission", ...
|
195 |
+
# 這裡可逐一對照,略示如下:
|
196 |
if sub_cat == "Mistranslation":
|
197 |
sub_cat_zh = "誤譯"
|
198 |
elif sub_cat == "Addition":
|
|
|
293 |
|
294 |
if not rater_selector:
|
295 |
return (
|
296 |
+
source, target, "", # return empty error_span
|
297 |
str(data.loc[current_index, "id"]),
|
298 |
"請先選擇標註人員!",
|
299 |
get_error_dataframe(),
|
|
|
335 |
|
336 |
if current_index >= len(data):
|
337 |
return (
|
338 |
+
"已完成所有文本標記", # source
|
339 |
+
"已完成所有文本標記", # target
|
340 |
+
"", # error_span
|
341 |
+
"", # current_index_display
|
342 |
f"標記完成並儲存到 {annotations_file.name}!(共 {len(data)} 筆)",
|
343 |
pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"]),
|
344 |
""
|
|
|
402 |
)
|
403 |
|
404 |
DEMO_EXPLANATION = """
|
405 |
+
## 翻譯標記工具:階段性操作流程
|
406 |
+
|
407 |
+
### 操作步驟
|
408 |
+
1. **先選擇標註人員與檔案**,並在「索引」下拉中挑選要標註的句子。
|
409 |
+
2. 在「步驟 1:錯誤標註」中,若翻譯文本有錯,請輸入「錯誤區間」、選擇「錯誤類別/子類別/嚴重度」並點「保存並繼續標記」。
|
410 |
+
- 多個錯誤可重複此步驟;若無錯誤則可直接點「完全正確」。
|
411 |
+
3. 錯誤標完後,在「步驟 2:評分與提交」中,拉動滑桿給分,若有更好譯文,可在「建議翻譯」填入。
|
412 |
+
4. 按「保存並顯示下一筆」送出本句標註並進入下一句。
|
413 |
"""
|
414 |
|
415 |
with gr.Blocks(css="""
|
416 |
+
/* 整體字體與行距 */
|
417 |
+
* {
|
418 |
+
font-size: 15px;
|
419 |
+
line-height: 1.4;
|
420 |
+
}
|
421 |
+
/* 按鈕分色 */
|
422 |
+
#correct_button {
|
423 |
+
background-color: #4CAF50; /* 綠 */
|
424 |
+
color: white;
|
425 |
+
font-size: 14px;
|
426 |
+
margin-bottom: 5px;
|
427 |
+
}
|
428 |
+
#too_many_errors_button {
|
429 |
+
background-color: #f44336; /* 紅 */
|
430 |
+
color: white;
|
431 |
+
font-size: 14px;
|
432 |
+
margin-bottom: 5px;
|
433 |
+
}
|
434 |
+
#save_current_button {
|
435 |
+
background-color: #1565C0; /* 藍 */
|
436 |
+
color: white;
|
437 |
+
font-size: 14px;
|
438 |
+
margin-bottom: 5px;
|
439 |
+
}
|
440 |
+
#save_next_button {
|
441 |
+
background-color: #1565C0; /* 藍 */
|
442 |
+
color: white;
|
443 |
+
font-size: 14px;
|
444 |
+
margin-bottom: 5px;
|
445 |
+
}
|
446 |
+
/* 模擬帶框風格 */
|
447 |
+
#highlight_box_group {
|
448 |
+
border: 1px solid #aaa;
|
449 |
+
padding: 10px;
|
450 |
+
margin-bottom: 10px;
|
451 |
+
min-height: 80px;
|
452 |
+
}
|
453 |
+
/* 讓「步驟區塊」顯示類似面板效果 */
|
454 |
+
#step1_box, #step2_box {
|
455 |
+
border: 1px solid #ccc;
|
456 |
+
padding: 10px;
|
457 |
+
margin-bottom: 10px;
|
458 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
459 |
""") as demo:
|
460 |
gr.Markdown(DEMO_EXPLANATION)
|
461 |
|
462 |
+
# ------------------- 頂部: 檔案 & 索引控制 -------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
with gr.Row():
|
464 |
+
with gr.Column(scale=1):
|
465 |
+
rater_selector = gr.Dropdown(
|
466 |
+
label="標註人員",
|
467 |
+
choices=["rater_test", "rater1", "rater2", "rater3", "rater4", "rater5", "rater6", "rater7"],
|
468 |
+
value="rater_test"
|
|
|
|
|
|
|
|
|
469 |
)
|
470 |
+
file_selector = gr.Dropdown(
|
471 |
+
label="選擇檔案",
|
472 |
+
choices=csv_files,
|
473 |
+
value="test.csv"
|
|
|
|
|
|
|
|
|
|
|
|
|
474 |
)
|
475 |
+
index_selector = gr.Dropdown(
|
476 |
+
label="選擇索引 (id-原文前10字)",
|
477 |
+
choices=get_all_ids(),
|
478 |
+
value=f"{data.loc[current_index, 'id']}-{str(data.loc[current_index, 'source'])[:10]}"
|
479 |
)
|
480 |
+
current_index_display = gr.Textbox(
|
481 |
+
label="當前索引(id)",
|
482 |
+
value=str(data.loc[current_index, "id"]),
|
483 |
+
interactive=False
|
|
|
484 |
)
|
485 |
|
486 |
+
# 左: 原始文本 / 右: 翻譯文本
|
487 |
+
with gr.Column(scale=4):
|
488 |
+
source = gr.Textbox(label="原始文本", lines=4, interactive=False)
|
489 |
+
with gr.Column(scale=4):
|
490 |
+
target = gr.Textbox(label="翻譯文本", lines=4, interactive=False)
|
491 |
|
492 |
+
with gr.Tab("錯誤標註"):
|
493 |
+
# ------------------- 螢光標記區(用 Group + elem_id)&錯誤紀錄表 -------------------
|
|
|
494 |
with gr.Row():
|
495 |
+
with gr.Column(scale=5):
|
496 |
+
with gr.Group(elem_id="highlight_box_group"):
|
497 |
+
highlighted_target = gr.HTML(value="", label="螢光標示區 (已標註的錯誤)")
|
498 |
+
with gr.Column(scale=5):
|
499 |
+
error_table = gr.Dataframe(
|
500 |
+
headers=["錯誤文字", "嚴重度", "分類"],
|
501 |
+
label="當前句子錯誤紀錄 (中文顯示)",
|
502 |
+
datatype=["str", "str", "str"],
|
503 |
+
interactive=False
|
504 |
+
)
|
505 |
+
|
506 |
+
# ------------------- 步驟1:錯誤標註 -------------------
|
507 |
+
# with gr.Group(elem_id="step1_box"):
|
508 |
+
with gr.Row():
|
509 |
+
gr.Markdown("### 步驟 1:錯誤標註")
|
510 |
+
|
511 |
+
with gr.Row():
|
512 |
+
error_span = gr.Textbox(label="錯誤區間 (可複製『翻譯文本』貼上)", lines=2)
|
513 |
+
category = gr.Dropdown(
|
514 |
+
label="錯誤類別",
|
515 |
+
choices=list(categories_display.keys()),
|
516 |
+
value="正確性"
|
517 |
+
)
|
518 |
+
subcategory = gr.Dropdown(
|
519 |
+
label="子類別",
|
520 |
+
choices=categories_display["正確性"],
|
521 |
+
value="誤譯"
|
522 |
+
)
|
523 |
+
other = gr.Textbox(label="其他子類別(如選『其他』則填寫)")
|
524 |
+
severity = gr.Dropdown(
|
525 |
+
label="嚴重度",
|
526 |
+
choices=severity_choices_display,
|
527 |
+
value="輕微 (Minor)"
|
528 |
+
)
|
529 |
+
|
530 |
+
with gr.Row():
|
531 |
+
correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
|
532 |
+
too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
|
533 |
+
save_current_button = gr.Button("保存並繼續標記當前資料")
|
534 |
+
|
535 |
+
with gr.Tab("評分與提交")
|
536 |
+
# ------------------- 步驟2:評分與提交 -------------------
|
537 |
+
# with gr.Group(elem_id="step2_box"):
|
538 |
+
with gr.Row():
|
539 |
+
gr.Markdown("### 步驟 2:評分與提交")
|
540 |
+
with gr.Row():
|
541 |
+
alternative_translation = gr.Textbox(
|
542 |
+
label="建議翻譯(如有更好譯法可填)",
|
543 |
+
lines=2
|
544 |
+
)
|
545 |
+
score = gr.Slider(
|
546 |
+
label="翻譯評分 (0=最差, 100=最好)",
|
547 |
+
minimum=0,
|
548 |
+
maximum=100,
|
549 |
+
step=1,
|
550 |
+
value=66
|
551 |
+
)
|
552 |
+
save_next_button = gr.Button("保存並顯示下一筆")
|
553 |
|
554 |
+
# ------------------- 當前狀態 -------------------
|
555 |
status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
|
556 |
|
557 |
+
# ------------------- 邏輯綁定 -------------------
|
558 |
def update_subcats(selected_category):
|
559 |
subcats = categories_display[selected_category]
|
560 |
return gr.update(choices=subcats, value=subcats[0])
|
|
|
591 |
outputs=[error_span, status, error_table]
|
592 |
)
|
593 |
|
594 |
+
# 按「保存並繼續標記」 -> 在同一句上加錯誤
|
595 |
save_current_button.click(
|
596 |
save_current,
|
597 |
inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
|
598 |
outputs=[error_span, status]
|
599 |
)
|
600 |
+
# 再更新表格 & 高亮
|
601 |
save_current_button.click(
|
602 |
fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
|
603 |
inputs=[target],
|
604 |
outputs=[error_table, highlighted_target]
|
605 |
)
|
606 |
|
607 |
+
# 按「保存並顯示下一筆」 -> 送出當前整句標註 & 進下一句
|
608 |
save_next_button.click(
|
609 |
save_and_next,
|
610 |
inputs=[source, target, score, rater_selector, alternative_translation],
|
|
|
615 |
]
|
616 |
)
|
617 |
|
618 |
+
# 初始化介面
|
619 |
init_src, init_tgt = get_current_text()
|
620 |
source.value = init_src
|
621 |
target.value = init_tgt
|