Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -192,7 +192,7 @@ def get_error_dataframe():
|
|
192 |
main_cat, sub_cat = cat_str.split("/", 1)
|
193 |
main_cat_zh = category_display_map.get(main_cat, main_cat)
|
194 |
# sub_cat -> e.g. "Mistranslation", "Addition", "Omission", ...
|
195 |
-
#
|
196 |
if sub_cat == "Mistranslation":
|
197 |
sub_cat_zh = "誤譯"
|
198 |
elif sub_cat == "Addition":
|
@@ -293,7 +293,7 @@ def save_and_next(source, target, score, rater_selector, alternative_translation
|
|
293 |
|
294 |
if not rater_selector:
|
295 |
return (
|
296 |
-
source, target, "",
|
297 |
str(data.loc[current_index, "id"]),
|
298 |
"請先選擇標註人員!",
|
299 |
get_error_dataframe(),
|
@@ -335,10 +335,10 @@ def save_and_next(source, target, score, rater_selector, alternative_translation
|
|
335 |
|
336 |
if current_index >= len(data):
|
337 |
return (
|
338 |
-
"已完成所有文本標記",
|
339 |
-
"已完成所有文本標記",
|
340 |
-
"",
|
341 |
-
"",
|
342 |
f"標記完成並儲存到 {annotations_file.name}!(共 {len(data)} 筆)",
|
343 |
pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"]),
|
344 |
""
|
@@ -402,53 +402,71 @@ def update_index_selection(selected_display):
|
|
402 |
)
|
403 |
|
404 |
DEMO_EXPLANATION = """
|
405 |
-
##
|
406 |
-
|
407 |
-
|
408 |
-
1.
|
409 |
-
2.
|
410 |
-
-
|
411 |
-
|
412 |
-
|
413 |
"""
|
414 |
|
415 |
with gr.Blocks(css="""
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
#
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
/*
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
""") as demo:
|
448 |
gr.Markdown(DEMO_EXPLANATION)
|
449 |
|
450 |
-
|
451 |
-
|
|
|
452 |
with gr.Row():
|
453 |
with gr.Column(scale=1):
|
454 |
rater_selector = gr.Dropdown(
|
@@ -472,31 +490,31 @@ with gr.Blocks(css="""
|
|
472 |
interactive=False
|
473 |
)
|
474 |
|
475 |
-
# 左: 原始文本 / 右: 翻譯文本
|
476 |
with gr.Column(scale=4):
|
477 |
source = gr.Textbox(label="原始文本", lines=4, interactive=False)
|
478 |
with gr.Column(scale=4):
|
479 |
target = gr.Textbox(label="翻譯文本", lines=4, interactive=False)
|
480 |
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
|
|
|
|
498 |
with gr.Row():
|
499 |
-
error_span = gr.Textbox(label="錯誤區間 (可複製『翻譯文本』貼上)", lines=2)
|
500 |
category = gr.Dropdown(
|
501 |
label="錯誤類別",
|
502 |
choices=list(categories_display.keys()),
|
@@ -507,103 +525,102 @@ with gr.Blocks(css="""
|
|
507 |
choices=categories_display["正確性"],
|
508 |
value="誤譯"
|
509 |
)
|
510 |
-
other = gr.Textbox(label="其他子類別(如選『其他』則填寫)")
|
511 |
severity = gr.Dropdown(
|
512 |
label="嚴重度",
|
513 |
choices=severity_choices_display,
|
514 |
value="輕微 (Minor)"
|
515 |
)
|
|
|
516 |
|
517 |
with gr.Row():
|
518 |
-
save_current_button = gr.Button("保存並繼續標記當前資料")
|
519 |
correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
|
520 |
too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
|
521 |
|
522 |
-
#
|
523 |
-
with gr.
|
524 |
-
gr.Markdown("
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
|
559 |
-
|
560 |
-
|
561 |
-
|
562 |
-
|
563 |
-
|
564 |
-
|
565 |
-
|
566 |
-
|
567 |
-
|
568 |
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
|
578 |
-
|
579 |
-
|
580 |
-
|
581 |
-
|
582 |
-
|
583 |
-
|
584 |
-
|
585 |
-
|
586 |
-
|
587 |
-
|
588 |
-
|
589 |
-
)
|
590 |
|
591 |
-
|
592 |
-
|
593 |
-
|
594 |
-
|
595 |
-
|
596 |
-
|
597 |
-
|
598 |
-
|
599 |
-
|
600 |
-
|
601 |
|
602 |
-
|
603 |
-
|
604 |
-
|
605 |
-
|
606 |
-
|
607 |
-
|
608 |
|
609 |
-
demo.launch()
|
|
|
192 |
main_cat, sub_cat = cat_str.split("/", 1)
|
193 |
main_cat_zh = category_display_map.get(main_cat, main_cat)
|
194 |
# sub_cat -> e.g. "Mistranslation", "Addition", "Omission", ...
|
195 |
+
# 這裡可逐一對照
|
196 |
if sub_cat == "Mistranslation":
|
197 |
sub_cat_zh = "誤譯"
|
198 |
elif sub_cat == "Addition":
|
|
|
293 |
|
294 |
if not rater_selector:
|
295 |
return (
|
296 |
+
source, target, "",
|
297 |
str(data.loc[current_index, "id"]),
|
298 |
"請先選擇標註人員!",
|
299 |
get_error_dataframe(),
|
|
|
335 |
|
336 |
if current_index >= len(data):
|
337 |
return (
|
338 |
+
"已完成所有文本標記",
|
339 |
+
"已完成所有文本標記",
|
340 |
+
"",
|
341 |
+
"",
|
342 |
f"標記完成並儲存到 {annotations_file.name}!(共 {len(data)} 筆)",
|
343 |
pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"]),
|
344 |
""
|
|
|
402 |
)
|
403 |
|
404 |
DEMO_EXPLANATION = """
|
405 |
+
## 翻譯標記工具:兩個 Tab
|
406 |
+
|
407 |
+
**操作步驟:**
|
408 |
+
1. 在「基本設定」中,選擇要標註的檔案、標註人員,以及句子索引。
|
409 |
+
2. 切換到「錯誤標註」標籤頁 (Tab),若發現翻譯有錯,於該頁面中輸入「錯誤區間」並選擇類別/嚴重度等,最後點擊「保存並繼續標記當前資料」。
|
410 |
+
- 若整句完全正確,可點擊「完全正確」
|
411 |
+
- 若錯誤太多超過五處,可點擊「過多錯誤」
|
412 |
+
3. 切換到「評分與提交」標籤頁,於此頁拉動滑桿給分,若有更好的譯文可在「建議翻譯」中輸入,最後按「保存並顯示下一筆」送出本句並進入下一筆。
|
413 |
"""
|
414 |
|
415 |
with gr.Blocks(css="""
|
416 |
+
/* 整體字體與行距 */
|
417 |
+
body {
|
418 |
+
background-color: #FAFAFA;
|
419 |
+
color: #333;
|
420 |
+
margin: 0;
|
421 |
+
padding: 0;
|
422 |
+
}
|
423 |
+
* {
|
424 |
+
font-size: 15px;
|
425 |
+
line-height: 1.4;
|
426 |
+
}
|
427 |
+
/* 按鈕分色,改用更柔和或深色系 */
|
428 |
+
#correct_button {
|
429 |
+
background-color: #2E7D32; /* 深綠 */
|
430 |
+
color: white;
|
431 |
+
font-size: 14px;
|
432 |
+
margin-bottom: 5px;
|
433 |
+
}
|
434 |
+
#too_many_errors_button {
|
435 |
+
background-color: #C62828; /* 深紅 */
|
436 |
+
color: white;
|
437 |
+
font-size: 14px;
|
438 |
+
margin-bottom: 5px;
|
439 |
+
}
|
440 |
+
#save_current_button {
|
441 |
+
background-color: #1565C0; /* 藍 */
|
442 |
+
color: white;
|
443 |
+
font-size: 14px;
|
444 |
+
margin-bottom: 5px;
|
445 |
+
}
|
446 |
+
#save_next_button {
|
447 |
+
background-color: #6D4C41; /* 棕 */
|
448 |
+
color: white;
|
449 |
+
font-size: 14px;
|
450 |
+
margin-bottom: 5px;
|
451 |
+
}
|
452 |
+
/* 螢光標示區的外框 */
|
453 |
+
#highlight_box_group {
|
454 |
+
border: 1px solid #aaa;
|
455 |
+
padding: 10px;
|
456 |
+
min-height: 80px;
|
457 |
+
}
|
458 |
+
/* 小面板 */
|
459 |
+
.panel {
|
460 |
+
border: 1px solid #ccc;
|
461 |
+
padding: 10px;
|
462 |
+
margin-bottom: 10px;
|
463 |
+
}
|
464 |
""") as demo:
|
465 |
gr.Markdown(DEMO_EXPLANATION)
|
466 |
|
467 |
+
# ------------------- 頂部:基本設定 -------------------
|
468 |
+
with gr.Box():
|
469 |
+
gr.Markdown("### 基本設定")
|
470 |
with gr.Row():
|
471 |
with gr.Column(scale=1):
|
472 |
rater_selector = gr.Dropdown(
|
|
|
490 |
interactive=False
|
491 |
)
|
492 |
|
|
|
493 |
with gr.Column(scale=4):
|
494 |
source = gr.Textbox(label="原始文本", lines=4, interactive=False)
|
495 |
with gr.Column(scale=4):
|
496 |
target = gr.Textbox(label="翻譯文本", lines=4, interactive=False)
|
497 |
|
498 |
+
# ------------------- 顯示螢光標示 & 錯誤紀錄 -------------------
|
499 |
+
with gr.Row():
|
500 |
+
with gr.Column(scale=5):
|
501 |
+
with gr.Group(elem_id="highlight_box_group"):
|
502 |
+
highlighted_target = gr.HTML(label="螢光標示區 (已標註的錯誤)")
|
503 |
+
with gr.Column(scale=5):
|
504 |
+
error_table = gr.Dataframe(
|
505 |
+
headers=["錯誤文字", "嚴重度", "分類"],
|
506 |
+
label="當前句子錯誤紀錄 (中文顯示)",
|
507 |
+
datatype=["str", "str", "str"],
|
508 |
+
interactive=False
|
509 |
+
)
|
510 |
+
|
511 |
+
# ------------------- 分成兩個 Tab -------------------
|
512 |
+
with gr.Tabs():
|
513 |
+
# ---- Tab 1: 錯誤標註 ----
|
514 |
+
with gr.Tab(label="錯誤標註"):
|
515 |
+
gr.Markdown("#### 在此頁標註錯誤區間,或按「完全正確」「過多錯誤」")
|
516 |
+
error_span = gr.Textbox(label="錯誤區間 (可複製『翻譯文本』貼上)", lines=2)
|
517 |
with gr.Row():
|
|
|
518 |
category = gr.Dropdown(
|
519 |
label="錯誤類別",
|
520 |
choices=list(categories_display.keys()),
|
|
|
525 |
choices=categories_display["正確性"],
|
526 |
value="誤譯"
|
527 |
)
|
|
|
528 |
severity = gr.Dropdown(
|
529 |
label="嚴重度",
|
530 |
choices=severity_choices_display,
|
531 |
value="輕微 (Minor)"
|
532 |
)
|
533 |
+
other = gr.Textbox(label="其他子類別(如選『其他』則在此填)")
|
534 |
|
535 |
with gr.Row():
|
536 |
+
save_current_button = gr.Button("保存並繼續標記當前資料", elem_id="save_current_button")
|
537 |
correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
|
538 |
too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
|
539 |
|
540 |
+
# ---- Tab 2: 評分與提交 ----
|
541 |
+
with gr.Tab(label="評分與提交"):
|
542 |
+
gr.Markdown("#### 在此頁給分,若有更適合的譯文可提供建議,最後點『保存並顯示下一筆』")
|
543 |
+
alternative_translation = gr.Textbox(
|
544 |
+
label="建議翻譯(如有更好譯法可填)",
|
545 |
+
lines=2
|
546 |
+
)
|
547 |
+
score = gr.Slider(
|
548 |
+
label="翻譯評分 (0=最差, 100=最好)",
|
549 |
+
minimum=0,
|
550 |
+
maximum=100,
|
551 |
+
step=1,
|
552 |
+
value=66
|
553 |
+
)
|
554 |
+
save_next_button = gr.Button("保存並顯示下一筆", elem_id="save_next_button")
|
555 |
+
|
556 |
+
# ------------------- 最下方:狀態顯示 -------------------
|
557 |
+
status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
|
558 |
+
|
559 |
+
# ------------------- 互動邏輯綁定 -------------------
|
560 |
+
def update_subcats(selected_category):
|
561 |
+
subcats = categories_display[selected_category]
|
562 |
+
return gr.update(choices=subcats, value=subcats[0])
|
563 |
+
|
564 |
+
file_selector.change(
|
565 |
+
update_file_selection,
|
566 |
+
inputs=[file_selector],
|
567 |
+
outputs=[
|
568 |
+
source, target, error_span,
|
569 |
+
index_selector, current_index_display,
|
570 |
+
status, error_table, highlighted_target
|
571 |
+
]
|
572 |
+
)
|
573 |
+
index_selector.change(
|
574 |
+
update_index_selection,
|
575 |
+
inputs=[index_selector],
|
576 |
+
outputs=[
|
577 |
+
source, target, current_index_display,
|
578 |
+
status, error_table, highlighted_target
|
579 |
+
]
|
580 |
+
)
|
581 |
+
category.change(
|
582 |
+
update_subcats,
|
583 |
+
inputs=[category],
|
584 |
+
outputs=[subcategory]
|
585 |
+
)
|
586 |
|
587 |
+
correct_button.click(
|
588 |
+
mark_as_correct,
|
589 |
+
outputs=[error_span, status, error_table]
|
590 |
+
)
|
591 |
+
too_many_errors_button.click(
|
592 |
+
mark_as_too_many_errors,
|
593 |
+
outputs=[error_span, status, error_table]
|
594 |
+
)
|
595 |
|
596 |
+
# 當「保存並繼續標記當前資料」
|
597 |
+
save_current_button.click(
|
598 |
+
save_current,
|
599 |
+
inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
|
600 |
+
outputs=[error_span, status]
|
601 |
+
)
|
602 |
+
save_current_button.click(
|
603 |
+
fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
|
604 |
+
inputs=[target],
|
605 |
+
outputs=[error_table, highlighted_target]
|
606 |
+
)
|
|
|
607 |
|
608 |
+
# 當「保存並顯示下一筆」
|
609 |
+
save_next_button.click(
|
610 |
+
save_and_next,
|
611 |
+
inputs=[source, target, score, rater_selector, alternative_translation],
|
612 |
+
outputs=[
|
613 |
+
source, target, error_span,
|
614 |
+
current_index_display, status,
|
615 |
+
error_table, highlighted_target
|
616 |
+
]
|
617 |
+
)
|
618 |
|
619 |
+
# 初始化
|
620 |
+
init_src, init_tgt = get_current_text()
|
621 |
+
source.value = init_src
|
622 |
+
target.value = init_tgt
|
623 |
+
error_table.value = pd.DataFrame(columns=["錯誤文字","嚴重度","分類"])
|
624 |
+
highlighted_target.value = highlight_errors_in_text(init_tgt, [])
|
625 |
|
626 |
+
demo.launch()
|