Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -234,13 +234,29 @@ def get_error_dataframe():
|
|
234 |
display_df["分類"] = df["category"].apply(map_category)
|
235 |
return display_df
|
236 |
|
|
|
237 |
def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
|
|
|
|
|
|
|
|
|
238 |
global current_index, data, current_errors
|
|
|
239 |
if len(current_errors) >= 5:
|
240 |
-
return
|
|
|
|
|
|
|
|
|
|
|
241 |
|
242 |
if error_span and error_span not in target:
|
243 |
-
return
|
|
|
|
|
|
|
|
|
|
|
244 |
|
245 |
# 轉英文
|
246 |
cat_val, subcat_val = subcategory_map.get((category, subcategory), (category_map.get(category, "Other"), "Other"))
|
@@ -250,9 +266,15 @@ def save_current(source, target, rater_selector, error_span, category, subcatego
|
|
250 |
start = target.find(error_span)
|
251 |
end = start + len(error_span)
|
252 |
|
|
|
253 |
for err in current_errors:
|
254 |
if err["start"] == start and err["end"] == end:
|
255 |
-
return
|
|
|
|
|
|
|
|
|
|
|
256 |
|
257 |
if subcat_val == "Other" and other.strip():
|
258 |
subcat_val = other.strip()
|
@@ -264,11 +286,23 @@ def save_current(source, target, rater_selector, error_span, category, subcatego
|
|
264 |
"end": end,
|
265 |
"category": f"{cat_val}/{subcat_val}"
|
266 |
})
|
267 |
-
|
268 |
else:
|
269 |
-
|
|
|
270 |
|
271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
global current_errors
|
273 |
current_errors.append({
|
274 |
"text": "",
|
@@ -277,9 +311,18 @@ def mark_as_correct():
|
|
277 |
"end": 0,
|
278 |
"category": "No-error"
|
279 |
})
|
280 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
-
def mark_as_too_many_errors():
|
|
|
|
|
|
|
283 |
global current_errors
|
284 |
current_errors.append({
|
285 |
"text": "",
|
@@ -288,7 +331,68 @@ def mark_as_too_many_errors():
|
|
288 |
"end": 0,
|
289 |
"category": "Non-translation"
|
290 |
})
|
291 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
def save_and_next(source, target, score, rater_selector, alternative_translation):
|
294 |
global current_index, data, annotations_file, current_errors, annotation_history
|
@@ -347,7 +451,7 @@ def save_and_next(source, target, score, rater_selector, alternative_translation
|
|
347 |
)
|
348 |
|
349 |
next_source, next_target = get_current_text()
|
350 |
-
status_msg = f"
|
351 |
|
352 |
highlighted_next = highlight_errors_in_text(next_target, current_errors)
|
353 |
return (
|
@@ -556,7 +660,7 @@ with gr.Blocks(css="""
|
|
556 |
# ------------------- 當前狀態 -------------------
|
557 |
status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
|
558 |
|
559 |
-
# -------------------
|
560 |
def update_subcats(selected_category):
|
561 |
subcats = categories_display[selected_category]
|
562 |
return gr.update(choices=subcats, value=subcats[0])
|
@@ -584,26 +688,44 @@ with gr.Blocks(css="""
|
|
584 |
outputs=[subcategory]
|
585 |
)
|
586 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
587 |
correct_button.click(
|
588 |
mark_as_correct,
|
589 |
-
|
|
|
590 |
)
|
591 |
too_many_errors_button.click(
|
592 |
mark_as_too_many_errors,
|
593 |
-
|
|
|
594 |
)
|
595 |
-
|
596 |
-
# 按「保存並繼續標記」 -> 在同一句上加錯誤
|
597 |
save_current_button.click(
|
598 |
save_current,
|
599 |
inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
|
600 |
-
outputs=[error_span, status]
|
601 |
-
)
|
602 |
-
# 再更新表格 & 高亮
|
603 |
-
save_current_button.click(
|
604 |
-
fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
|
605 |
-
inputs=[target],
|
606 |
-
outputs=[error_table, highlighted_target]
|
607 |
)
|
608 |
|
609 |
# 按「保存並顯示下一筆」 -> 送出當前整句標註 & 進下一句
|
|
|
234 |
display_df["分類"] = df["category"].apply(map_category)
|
235 |
return display_df
|
236 |
|
237 |
+
# === 關鍵修正:把「保存並繼續標記」後,要同時更新表格與螢光區 ===
|
238 |
def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
|
239 |
+
"""
|
240 |
+
原本的邏輯 + 一次回傳 error_span, status, error_table, highlighted_target,
|
241 |
+
使得按下按鈕後能同步更新介面。
|
242 |
+
"""
|
243 |
global current_index, data, current_errors
|
244 |
+
# 若已標記超過 5 處錯誤
|
245 |
if len(current_errors) >= 5:
|
246 |
+
return (
|
247 |
+
"", # error_span 清空
|
248 |
+
"您已標記超過 5 處錯誤,可直接按『過多錯誤』或繼續。",
|
249 |
+
get_error_dataframe(),
|
250 |
+
highlight_errors_in_text(target, current_errors)
|
251 |
+
)
|
252 |
|
253 |
if error_span and error_span not in target:
|
254 |
+
return (
|
255 |
+
"",
|
256 |
+
"錯誤區間不存在於翻譯文本,請檢查!",
|
257 |
+
get_error_dataframe(),
|
258 |
+
highlight_errors_in_text(target, current_errors)
|
259 |
+
)
|
260 |
|
261 |
# 轉英文
|
262 |
cat_val, subcat_val = subcategory_map.get((category, subcategory), (category_map.get(category, "Other"), "Other"))
|
|
|
266 |
start = target.find(error_span)
|
267 |
end = start + len(error_span)
|
268 |
|
269 |
+
# 檢查是否重複標記
|
270 |
for err in current_errors:
|
271 |
if err["start"] == start and err["end"] == end:
|
272 |
+
return (
|
273 |
+
"",
|
274 |
+
"此錯誤區間已標記過,請勿重複。",
|
275 |
+
get_error_dataframe(),
|
276 |
+
highlight_errors_in_text(target, current_errors)
|
277 |
+
)
|
278 |
|
279 |
if subcat_val == "Other" and other.strip():
|
280 |
subcat_val = other.strip()
|
|
|
286 |
"end": end,
|
287 |
"category": f"{cat_val}/{subcat_val}"
|
288 |
})
|
289 |
+
status_msg = f"已標記錯誤: {error_span} (範圍 {start}-{end})"
|
290 |
else:
|
291 |
+
# 未輸入錯誤區間
|
292 |
+
status_msg = "尚未輸入錯誤區間,如無錯誤請按『完全正確』"
|
293 |
|
294 |
+
return (
|
295 |
+
"",
|
296 |
+
status_msg,
|
297 |
+
get_error_dataframe(),
|
298 |
+
highlight_errors_in_text(target, current_errors)
|
299 |
+
)
|
300 |
+
|
301 |
+
|
302 |
+
def mark_as_correct(target):
|
303 |
+
"""
|
304 |
+
標記為完全正確 (No-error),同時更新表格 & 螢光區。
|
305 |
+
"""
|
306 |
global current_errors
|
307 |
current_errors.append({
|
308 |
"text": "",
|
|
|
311 |
"end": 0,
|
312 |
"category": "No-error"
|
313 |
})
|
314 |
+
return (
|
315 |
+
"", # error_span
|
316 |
+
"標註為完全正確!",
|
317 |
+
get_error_dataframe(),
|
318 |
+
highlight_errors_in_text(target, current_errors)
|
319 |
+
)
|
320 |
+
|
321 |
|
322 |
+
def mark_as_too_many_errors(target):
|
323 |
+
"""
|
324 |
+
標記為過多錯誤 (Non-translation),同時更新表格 & 螢光區。
|
325 |
+
"""
|
326 |
global current_errors
|
327 |
current_errors.append({
|
328 |
"text": "",
|
|
|
331 |
"end": 0,
|
332 |
"category": "Non-translation"
|
333 |
})
|
334 |
+
return (
|
335 |
+
"",
|
336 |
+
"已標註為過多錯誤!",
|
337 |
+
get_error_dataframe(),
|
338 |
+
highlight_errors_in_text(target, current_errors)
|
339 |
+
)
|
340 |
+
|
341 |
+
# def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
|
342 |
+
# global current_index, data, current_errors
|
343 |
+
# if len(current_errors) >= 5:
|
344 |
+
# return "", "您已標記超過 5 處錯誤,可直接按『過多錯誤』或繼續。"
|
345 |
+
|
346 |
+
# if error_span and error_span not in target:
|
347 |
+
# return "", "錯誤區間不存在於翻譯文本,請檢查!"
|
348 |
+
|
349 |
+
# # 轉英文
|
350 |
+
# cat_val, subcat_val = subcategory_map.get((category, subcategory), (category_map.get(category, "Other"), "Other"))
|
351 |
+
# severity_val = severity_map.get(severity, "Minor")
|
352 |
+
|
353 |
+
# if error_span:
|
354 |
+
# start = target.find(error_span)
|
355 |
+
# end = start + len(error_span)
|
356 |
+
|
357 |
+
# for err in current_errors:
|
358 |
+
# if err["start"] == start and err["end"] == end:
|
359 |
+
# return "", "此錯誤區間已標記過,請勿重複。"
|
360 |
+
|
361 |
+
# if subcat_val == "Other" and other.strip():
|
362 |
+
# subcat_val = other.strip()
|
363 |
+
|
364 |
+
# current_errors.append({
|
365 |
+
# "text": error_span,
|
366 |
+
# "severity": severity_val,
|
367 |
+
# "start": start,
|
368 |
+
# "end": end,
|
369 |
+
# "category": f"{cat_val}/{subcat_val}"
|
370 |
+
# })
|
371 |
+
# return "", f"已標記錯誤: {error_span} (範圍 {start}-{end})"
|
372 |
+
# else:
|
373 |
+
# return "", "尚未輸入錯誤區間,如無錯誤請按『完全正確』"
|
374 |
+
|
375 |
+
# def mark_as_correct():
|
376 |
+
# global current_errors
|
377 |
+
# current_errors.append({
|
378 |
+
# "text": "",
|
379 |
+
# "severity": "No-error",
|
380 |
+
# "start": 0,
|
381 |
+
# "end": 0,
|
382 |
+
# "category": "No-error"
|
383 |
+
# })
|
384 |
+
# return "", "標註為完全正確!", get_error_dataframe()
|
385 |
+
|
386 |
+
# def mark_as_too_many_errors():
|
387 |
+
# global current_errors
|
388 |
+
# current_errors.append({
|
389 |
+
# "text": "",
|
390 |
+
# "severity": "Major",
|
391 |
+
# "start": 0,
|
392 |
+
# "end": 0,
|
393 |
+
# "category": "Non-translation"
|
394 |
+
# })
|
395 |
+
# return "", "已標註為過多錯誤!", get_error_dataframe()
|
396 |
|
397 |
def save_and_next(source, target, score, rater_selector, alternative_translation):
|
398 |
global current_index, data, annotations_file, current_errors, annotation_history
|
|
|
451 |
)
|
452 |
|
453 |
next_source, next_target = get_current_text()
|
454 |
+
status_msg = f"已提交!目前進度:已完成第 {current_index} 筆 (id={current_index-1}) / 共 {len(data)} 筆。"
|
455 |
|
456 |
highlighted_next = highlight_errors_in_text(next_target, current_errors)
|
457 |
return (
|
|
|
660 |
# ------------------- 當前狀態 -------------------
|
661 |
status = gr.Textbox(label="當前狀態", lines=1, interactive=False)
|
662 |
|
663 |
+
# ------------------- callback 綁定 -------------------
|
664 |
def update_subcats(selected_category):
|
665 |
subcats = categories_display[selected_category]
|
666 |
return gr.update(choices=subcats, value=subcats[0])
|
|
|
688 |
outputs=[subcategory]
|
689 |
)
|
690 |
|
691 |
+
# correct_button.click(
|
692 |
+
# mark_as_correct,
|
693 |
+
# outputs=[error_span, status, error_table]
|
694 |
+
# )
|
695 |
+
# too_many_errors_button.click(
|
696 |
+
# mark_as_too_many_errors,
|
697 |
+
# outputs=[error_span, status, error_table]
|
698 |
+
# )
|
699 |
+
|
700 |
+
# # 按「保存並繼續標記」 -> 在同一句上加錯誤
|
701 |
+
# save_current_button.click(
|
702 |
+
# save_current,
|
703 |
+
# inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
|
704 |
+
# outputs=[error_span, status]
|
705 |
+
# )
|
706 |
+
# # 再更新表格 & 高亮
|
707 |
+
# save_current_button.click(
|
708 |
+
# fn=lambda tgt: (get_error_dataframe(), highlight_errors_in_text(tgt, current_errors)),
|
709 |
+
# inputs=[target],
|
710 |
+
# outputs=[error_table, highlighted_target]
|
711 |
+
# )
|
712 |
+
|
713 |
+
# === 以下三個按鈕,皆一次更新表格與螢光區 ===
|
714 |
+
# 按「保存並繼續標記」 -> 在同一句上加錯誤並更新表格 & 高亮
|
715 |
correct_button.click(
|
716 |
mark_as_correct,
|
717 |
+
inputs=[target],
|
718 |
+
outputs=[error_span, status, error_table, highlighted_target]
|
719 |
)
|
720 |
too_many_errors_button.click(
|
721 |
mark_as_too_many_errors,
|
722 |
+
inputs=[target],
|
723 |
+
outputs=[error_span, status, error_table, highlighted_target]
|
724 |
)
|
|
|
|
|
725 |
save_current_button.click(
|
726 |
save_current,
|
727 |
inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other],
|
728 |
+
outputs=[error_span, status, error_table, highlighted_target]
|
|
|
|
|
|
|
|
|
|
|
|
|
729 |
)
|
730 |
|
731 |
# 按「保存並顯示下一筆」 -> 送出當前整句標註 & 進下一句
|