350016z commited on
Commit
c0385ca
·
verified ·
1 Parent(s): c1bca1d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -139
app.py CHANGED
@@ -192,6 +192,7 @@ def get_error_dataframe():
192
  main_cat, sub_cat = cat_str.split("/", 1)
193
  main_cat_zh = category_display_map.get(main_cat, main_cat)
194
  # sub_cat -> e.g. "Mistranslation", "Addition", "Omission", ...
 
195
  if sub_cat == "Mistranslation":
196
  sub_cat_zh = "誤譯"
197
  elif sub_cat == "Addition":
@@ -292,7 +293,7 @@ def save_and_next(source, target, score, rater_selector, alternative_translation
292
 
293
  if not rater_selector:
294
  return (
295
- source, target, "",
296
  str(data.loc[current_index, "id"]),
297
  "請先選擇標註人員!",
298
  get_error_dataframe(),
@@ -334,10 +335,10 @@ def save_and_next(source, target, score, rater_selector, alternative_translation
334
 
335
  if current_index >= len(data):
336
  return (
337
- "已完成所有文本標記",
338
- "已完成所有文本標記",
339
- "",
340
- "",
341
  f"標記完成並儲存到 {annotations_file.name}!(共 {len(data)} 筆)",
342
  pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"]),
343
  ""
@@ -401,154 +402,159 @@ def update_index_selection(selected_display):
401
  )
402
 
403
  DEMO_EXPLANATION = """
404
- ## 翻譯標記工具 (無 Box 元件)
405
-
406
- 此版本完全移除了 `gr.Box()`, 直接用 `gr.Group()`, `gr.Row()`, `gr.Column()` 加上簡單 CSS 來排版。
 
 
 
 
 
407
  """
408
 
409
  with gr.Blocks(css="""
410
- /* 整體字體與背景 */
411
- body {
412
- background-color: #F9F9F9;
413
- color: #333;
414
- margin: 0;
415
- padding: 0;
416
- }
417
- * {
418
- font-size: 15px;
419
- line-height: 1.4;
420
- }
421
-
422
- /* 自訂一些按鈕顏色 */
423
- #correct_button {
424
- background-color: #2E7D32; /* 綠 */
425
- color: white;
426
- font-size: 14px;
427
- margin-bottom: 5px;
428
- }
429
- #too_many_errors_button {
430
- background-color: #C62828; /* 紅 */
431
- color: white;
432
- font-size: 14px;
433
- margin-bottom: 5px;
434
- }
435
- #save_current_button {
436
- background-color: #1565C0; /* 藍 */
437
- color: white;
438
- font-size: 14px;
439
- margin-bottom: 5px;
440
- }
441
- #save_next_button {
442
- background-color: #6D4C41; /* 棕 */
443
- color: white;
444
- font-size: 14px;
445
- margin-bottom: 5px;
446
- }
447
-
448
- /* group 來替代 box 的外框效果 */
449
- #outer_panel {
450
- border: 1px solid #ccc;
451
- padding: 15px;
452
- margin-bottom: 15px;
453
- }
454
- #highlight_panel {
455
- border: 1px solid #aaa;
456
- padding: 10px;
457
- min-height: 80px;
458
- }
459
  """) as demo:
460
  gr.Markdown(DEMO_EXPLANATION)
461
 
462
- # ------------------- 頂部: 基本設定 + 文字顯示 -------------------
463
- with gr.Group(elem_id="outer_panel"):
464
- gr.Markdown("### 基本設定")
465
- with gr.Row():
466
- with gr.Column(scale=1):
467
- rater_selector = gr.Dropdown(
468
- label="標註人員",
469
- choices=["rater1", "rater2", "rater3", "rater4", "rater5", "rater6", "rater7"],
470
- value="rater1"
471
- )
472
- file_selector = gr.Dropdown(
473
- label="選擇檔案",
474
- choices=csv_files,
475
- value="test.csv"
476
- )
477
- index_selector = gr.Dropdown(
478
- label="選擇索引 (id-原文前10字)",
479
- choices=get_all_ids(),
480
- value=f"{data.loc[current_index, 'id']}-{str(data.loc[current_index, 'source'])[:10]}"
481
- )
482
- current_index_display = gr.Textbox(
483
- label="當前索引(id)",
484
- value=str(data.loc[current_index, "id"]),
485
- interactive=False
486
- )
487
- with gr.Column(scale=4):
488
- source = gr.Textbox(label="原始文本", lines=4, interactive=False)
489
- with gr.Column(scale=4):
490
- target = gr.Textbox(label="翻譯文本", lines=4, interactive=False)
491
-
492
- # ------------------- 中段: 螢光標記區 + 錯誤表格 -------------------
493
  with gr.Row():
494
- with gr.Column(scale=5):
495
- with gr.Group(elem_id="highlight_panel"):
496
- highlighted_target = gr.HTML(label="螢光標示區 (已標註的錯誤)")
497
- with gr.Column(scale=5):
498
- error_table = gr.Dataframe(
499
- headers=["錯誤文字", "嚴重度", "分類"],
500
- label="當前句子錯誤紀錄 (中文顯示)",
501
- datatype=["str", "str", "str"],
502
- interactive=False
503
  )
504
-
505
- # ------------------- 錯誤標註區 -------------------
506
- with gr.Group(elem_id="outer_panel"):
507
- gr.Markdown("### 錯誤標註")
508
- with gr.Row():
509
- error_span = gr.Textbox(label="錯誤區間 (可複製『翻譯文本』貼上)", lines=2)
510
- category = gr.Dropdown(
511
- label="錯誤類別",
512
- choices=list(categories_display.keys()),
513
- value="正確性"
514
  )
515
- subcategory = gr.Dropdown(
516
- label="子類別",
517
- choices=categories_display["正確性"],
518
- value="誤譯"
519
  )
520
- other = gr.Textbox(label="其他子類別(如選『其他』則填寫)")
521
- severity = gr.Dropdown(
522
- label="嚴重度",
523
- choices=severity_choices_display,
524
- value="輕微 (Minor)"
525
  )
526
 
527
- with gr.Row():
528
- save_current_button = gr.Button("保存並繼續標記當前資料", elem_id="save_current_button")
529
- correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
530
- too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
 
531
 
532
- # ------------------- 評分與提交 -------------------
533
- with gr.Group(elem_id="outer_panel"):
534
- gr.Markdown("### 評分與提交")
535
  with gr.Row():
536
- alternative_translation = gr.Textbox(
537
- label="建議翻譯(如有更好譯法可填)",
538
- lines=2
539
- )
540
- score = gr.Slider(
541
- label="翻譯評分 (0=最差, 100=最好)",
542
- minimum=0,
543
- maximum=100,
544
- step=1,
545
- value=66
546
- )
547
- save_next_button = gr.Button("保存並顯示下一筆", elem_id="save_next_button")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
 
 
549
  status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
550
 
551
- # ------------------- 互動邏輯綁定 -------------------
552
  def update_subcats(selected_category):
553
  subcats = categories_display[selected_category]
554
  return gr.update(choices=subcats, value=subcats[0])
@@ -585,19 +591,20 @@ body {
585
  outputs=[error_span, status, error_table]
586
  )
587
 
588
- # 當「保存並繼續標記當前資料」
589
  save_current_button.click(
590
  save_current,
591
  inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
592
  outputs=[error_span, status]
593
  )
 
594
  save_current_button.click(
595
  fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
596
  inputs=[target],
597
  outputs=[error_table, highlighted_target]
598
  )
599
 
600
- # 當「保存並顯示下一筆」
601
  save_next_button.click(
602
  save_and_next,
603
  inputs=[source, target, score, rater_selector, alternative_translation],
@@ -608,7 +615,7 @@ body {
608
  ]
609
  )
610
 
611
- # 初始化
612
  init_src, init_tgt = get_current_text()
613
  source.value = init_src
614
  target.value = init_tgt
 
192
  main_cat, sub_cat = cat_str.split("/", 1)
193
  main_cat_zh = category_display_map.get(main_cat, main_cat)
194
  # sub_cat -> e.g. "Mistranslation", "Addition", "Omission", ...
195
+ # 這裡可逐一對照,略示如下:
196
  if sub_cat == "Mistranslation":
197
  sub_cat_zh = "誤譯"
198
  elif sub_cat == "Addition":
 
293
 
294
  if not rater_selector:
295
  return (
296
+ source, target, "", # return empty error_span
297
  str(data.loc[current_index, "id"]),
298
  "請先選擇標註人員!",
299
  get_error_dataframe(),
 
335
 
336
  if current_index >= len(data):
337
  return (
338
+ "已完成所有文本標記", # source
339
+ "已完成所有文本標記", # target
340
+ "", # error_span
341
+ "", # current_index_display
342
  f"標記完成並儲存到 {annotations_file.name}!(共 {len(data)} 筆)",
343
  pd.DataFrame(columns=["錯誤文字", "嚴重度", "分類"]),
344
  ""
 
402
  )
403
 
404
  DEMO_EXPLANATION = """
405
+ ## 翻譯標記工具:階段性操作流程
406
+
407
+ ### 操作步驟
408
+ 1. **先選擇標註人員與檔案**,並在「索引」下拉中挑選要標註的句子。
409
+ 2. 在「步驟 1:錯誤標註」中,若翻譯文本有錯,請輸入「錯誤區間」、選擇「錯誤類別/子類別/嚴重度」並點「保存並繼續標記」。
410
+ - 多個錯誤可重複此步驟;若無錯誤則可直接點「完全正確」。
411
+ 3. 錯誤標完後,在「步驟 2:評分與提交」中,拉動滑桿給分,若有更好譯文,可在「建議翻譯」填入。
412
+ 4. 按「保存並顯示下一筆」送出本句標註並進入下一句。
413
  """
414
 
415
  with gr.Blocks(css="""
416
+ /* 整體字體與行距 */
417
+ * {
418
+ font-size: 15px;
419
+ line-height: 1.4;
420
+ }
421
+ /* 按鈕分色 */
422
+ #correct_button {
423
+ background-color: #4CAF50; /* 綠 */
424
+ color: white;
425
+ font-size: 14px;
426
+ margin-bottom: 5px;
427
+ }
428
+ #too_many_errors_button {
429
+ background-color: #f44336; /* 紅 */
430
+ color: white;
431
+ font-size: 14px;
432
+ margin-bottom: 5px;
433
+ }
434
+ #save_current_button {
435
+ background-color: #1565C0; /* 藍 */
436
+ color: white;
437
+ font-size: 14px;
438
+ margin-bottom: 5px;
439
+ }
440
+ #save_next_button {
441
+ background-color: #1565C0; /* 藍 */
442
+ color: white;
443
+ font-size: 14px;
444
+ margin-bottom: 5px;
445
+ }
446
+ /* 模擬帶框風格 */
447
+ #highlight_box_group {
448
+ border: 1px solid #aaa;
449
+ padding: 10px;
450
+ margin-bottom: 10px;
451
+ min-height: 80px;
452
+ }
453
+ /* 讓「步驟區塊」顯示類似面板效果 */
454
+ #step1_box, #step2_box {
455
+ border: 1px solid #ccc;
456
+ padding: 10px;
457
+ margin-bottom: 10px;
458
+ }
 
 
 
 
 
 
459
  """) as demo:
460
  gr.Markdown(DEMO_EXPLANATION)
461
 
462
+ # ------------------- 頂部: 檔案 & 索引控制 -------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
463
  with gr.Row():
464
+ with gr.Column(scale=1):
465
+ rater_selector = gr.Dropdown(
466
+ label="標註人員",
467
+ choices=["rater_test", "rater1", "rater2", "rater3", "rater4", "rater5", "rater6", "rater7"],
468
+ value="rater_test"
 
 
 
 
469
  )
470
+ file_selector = gr.Dropdown(
471
+ label="選擇檔案",
472
+ choices=csv_files,
473
+ value="test.csv"
 
 
 
 
 
 
474
  )
475
+ index_selector = gr.Dropdown(
476
+ label="選擇索引 (id-原文前10字)",
477
+ choices=get_all_ids(),
478
+ value=f"{data.loc[current_index, 'id']}-{str(data.loc[current_index, 'source'])[:10]}"
479
  )
480
+ current_index_display = gr.Textbox(
481
+ label="當前索引(id)",
482
+ value=str(data.loc[current_index, "id"]),
483
+ interactive=False
 
484
  )
485
 
486
+ # 左: 原始文本 / 右: 翻譯文本
487
+ with gr.Column(scale=4):
488
+ source = gr.Textbox(label="原始文本", lines=4, interactive=False)
489
+ with gr.Column(scale=4):
490
+ target = gr.Textbox(label="翻譯文本", lines=4, interactive=False)
491
 
492
+ with gr.Tab("錯誤標註"):
493
+ # ------------------- 螢光標記區(用 Group + elem_id)&錯誤紀錄表 -------------------
 
494
  with gr.Row():
495
+ with gr.Column(scale=5):
496
+ with gr.Group(elem_id="highlight_box_group"):
497
+ highlighted_target = gr.HTML(value="", label="螢光標示區 (已標註的錯誤)")
498
+ with gr.Column(scale=5):
499
+ error_table = gr.Dataframe(
500
+ headers=["錯誤文字", "嚴重度", "分類"],
501
+ label="當前句子錯誤紀錄 (中文顯示)",
502
+ datatype=["str", "str", "str"],
503
+ interactive=False
504
+ )
505
+
506
+ # ------------------- 步驟1:錯誤標註 -------------------
507
+ # with gr.Group(elem_id="step1_box"):
508
+ with gr.Row():
509
+ gr.Markdown("### 步驟 1:錯誤標註")
510
+
511
+ with gr.Row():
512
+ error_span = gr.Textbox(label="錯誤區間 (可複製『翻譯文本』貼上)", lines=2)
513
+ category = gr.Dropdown(
514
+ label="錯誤類別",
515
+ choices=list(categories_display.keys()),
516
+ value="正確性"
517
+ )
518
+ subcategory = gr.Dropdown(
519
+ label="子類別",
520
+ choices=categories_display["正確性"],
521
+ value="誤譯"
522
+ )
523
+ other = gr.Textbox(label="其他子類別(如選『其他』則填寫)")
524
+ severity = gr.Dropdown(
525
+ label="嚴重度",
526
+ choices=severity_choices_display,
527
+ value="輕微 (Minor)"
528
+ )
529
+
530
+ with gr.Row():
531
+ correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
532
+ too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
533
+ save_current_button = gr.Button("保存並繼續標記當前資料")
534
+
535
+ with gr.Tab("評分與提交")
536
+ # ------------------- 步驟2:評分與提交 -------------------
537
+ # with gr.Group(elem_id="step2_box"):
538
+ with gr.Row():
539
+ gr.Markdown("### 步驟 2:評分與提交")
540
+ with gr.Row():
541
+ alternative_translation = gr.Textbox(
542
+ label="建議翻譯(如有更好譯法可填)",
543
+ lines=2
544
+ )
545
+ score = gr.Slider(
546
+ label="翻譯評分 (0=最差, 100=最好)",
547
+ minimum=0,
548
+ maximum=100,
549
+ step=1,
550
+ value=66
551
+ )
552
+ save_next_button = gr.Button("保存並顯示下一筆")
553
 
554
+ # ------------------- 當前狀態 -------------------
555
  status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
556
 
557
+ # ------------------- 邏輯綁定 -------------------
558
  def update_subcats(selected_category):
559
  subcats = categories_display[selected_category]
560
  return gr.update(choices=subcats, value=subcats[0])
 
591
  outputs=[error_span, status, error_table]
592
  )
593
 
594
+ # 按「保存並繼續標記」 -> 在同一句上加錯誤
595
  save_current_button.click(
596
  save_current,
597
  inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
598
  outputs=[error_span, status]
599
  )
600
+ # 再更新表格 & 高亮
601
  save_current_button.click(
602
  fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
603
  inputs=[target],
604
  outputs=[error_table, highlighted_target]
605
  )
606
 
607
+ # 按「保存並顯示下一筆」 -> 送出當前整句標註 & 進下一句
608
  save_next_button.click(
609
  save_and_next,
610
  inputs=[source, target, score, rater_selector, alternative_translation],
 
615
  ]
616
  )
617
 
618
+ # 初始化介面
619
  init_src, init_tgt = get_current_text()
620
  source.value = init_src
621
  target.value = init_tgt