Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -69,7 +69,6 @@ data = pd.read_csv(data_path, dtype={"id": "Int64"}) # 確保 id 為標準 Pytho
|
|
69 |
|
70 |
current_index = 0
|
71 |
current_errors = []
|
72 |
-
current_others = []
|
73 |
|
74 |
annotations_file = DATASET_DIR / f"test-{uuid4()}.json"
|
75 |
# ---------------------------------------------------------------------------------------------------------------------
|
@@ -96,9 +95,6 @@ def save_to_json(entry: dict, json_file: Path):
|
|
96 |
|
97 |
def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
|
98 |
global current_index, data, current_errors
|
99 |
-
|
100 |
-
if category == "No-error":
|
101 |
-
return "", "無錯誤,不需要保存錯誤區間。"
|
102 |
|
103 |
system = data.loc[current_index, "system"]
|
104 |
lp = data.loc[current_index, "lp"]
|
@@ -106,10 +102,10 @@ def save_current(source, target, rater_selector, error_span, category, subcatego
|
|
106 |
id = int(data.loc[current_index, "id"])
|
107 |
reference = data.loc[current_index, "reference"]
|
108 |
|
109 |
-
if
|
110 |
-
category_value = f"{category}/{subcategory}"
|
111 |
-
else:
|
112 |
-
category_value = category
|
113 |
|
114 |
if error_span and error_span in target:
|
115 |
start = target.find(error_span)
|
@@ -123,11 +119,7 @@ def save_current(source, target, rater_selector, error_span, category, subcatego
|
|
123 |
"severity": severity.lower(),
|
124 |
"start": start,
|
125 |
"end": end
|
126 |
-
})
|
127 |
-
|
128 |
-
current_others.append({
|
129 |
"category": category_value,
|
130 |
-
"others": other if other else "",
|
131 |
})
|
132 |
|
133 |
# [error_span, status]
|
@@ -135,7 +127,7 @@ def save_current(source, target, rater_selector, error_span, category, subcatego
|
|
135 |
|
136 |
|
137 |
def save_and_next(source, target, score, rater_selector):
|
138 |
-
global current_index, data, annotations_file, current_errors
|
139 |
|
140 |
system = data.loc[current_index, "system"]
|
141 |
lp = data.loc[current_index, "lp"]
|
@@ -152,15 +144,13 @@ def save_and_next(source, target, score, rater_selector):
|
|
152 |
"src": source,
|
153 |
"mt": target,
|
154 |
"ref": reference,
|
155 |
-
"
|
156 |
-
"
|
157 |
-
"others": current_others,
|
158 |
}
|
159 |
save_to_json(annotations_entry, annotations_file)
|
160 |
|
161 |
# 清空當前錯誤緩存
|
162 |
current_errors = []
|
163 |
-
current_others = []
|
164 |
|
165 |
current_index += 1
|
166 |
if current_index >= len(data):
|
@@ -195,16 +185,29 @@ def update_index_selection(selected_index):
|
|
195 |
return get_current_text() + (str(selected_index), f"已跳轉至 id: {selected_index}")
|
196 |
|
197 |
categories = {
|
198 |
-
"No-error": [],
|
199 |
"Accuracy": ["Mistranslation", "Addition", "Omission", "Other"],
|
200 |
"Fluency": ["Grammar", "Spelling", "Punctuation", "Inconsistency", "Register", "Other"],
|
201 |
"Terminology": ["Inappropriate", "Inconsistent", "Other"],
|
202 |
"Style": ["Awkward", "Other"],
|
203 |
"Locale": ["Currency format", "Time format", "Name format", "Date format", "Address format", "Other"],
|
204 |
-
"Non-translation": []
|
205 |
}
|
|
|
206 |
rater = ['rater1', 'rater2','rater3', 'rater4', 'rater5', 'rater6', 'rater7']
|
207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
DEMO_EXPLANATION = """
|
209 |
## 翻譯標記工具
|
210 |
### 使用規則 [更多細節](https://huggingface.co/spaces/350016z/TranslationError_Gradio/blob/main/README.md)
|
@@ -221,9 +224,19 @@ DEMO_EXPLANATION = """
|
|
221 |
- 33:保留部分原文意思,但有明顯遺漏,敘述難以理解,文法可能很差。
|
222 |
- 66:保留大部分原文意思,有一些文法錯誤或輕微不一致。
|
223 |
- 100:原文意思和文法完全正確。
|
|
|
224 |
"""
|
225 |
|
226 |
-
with gr.Blocks(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
227 |
gr.Markdown(DEMO_EXPLANATION)
|
228 |
|
229 |
with gr.Tab("標記工具"):
|
@@ -241,6 +254,9 @@ with gr.Blocks() as demo:
|
|
241 |
with gr.Row(variant='panel', equal_height=True):
|
242 |
with gr.Column(scale=1):
|
243 |
error_span = gr.Textbox(label="錯誤區間 (💡可以直接複製「翻譯文本」欄位,並在此貼上)", lines=6, placeholder="請輸入翻譯中文本的錯誤區間 (如無錯誤則不需)")
|
|
|
|
|
|
|
244 |
with gr.Column(scale=1):
|
245 |
with gr.Row(equal_height=True):
|
246 |
category = gr.Dropdown(label="錯誤類別", choices=list(categories.keys()), value="No-error")
|
@@ -269,6 +285,10 @@ with gr.Blocks() as demo:
|
|
269 |
file_selector.change(update_file_selection, inputs=[file_selector], outputs=[source, target, error_span, index_selector, current_index_display, status])
|
270 |
index_selector.change(update_index_selection, inputs=[index_selector], outputs=[source, target, current_index_display, status])
|
271 |
category.change(update_subcategories, inputs=[category], outputs=[subcategory])
|
|
|
|
|
|
|
|
|
272 |
save_current_button.click(save_current, inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other], outputs=[error_span, status])
|
273 |
save_next_button.click(save_and_next, inputs=[source, target, score, rater_selector], outputs=[source, target, error_span, current_index_display, status])
|
274 |
|
|
|
69 |
|
70 |
current_index = 0
|
71 |
current_errors = []
|
|
|
72 |
|
73 |
annotations_file = DATASET_DIR / f"test-{uuid4()}.json"
|
74 |
# ---------------------------------------------------------------------------------------------------------------------
|
|
|
95 |
|
96 |
def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
|
97 |
global current_index, data, current_errors
|
|
|
|
|
|
|
98 |
|
99 |
system = data.loc[current_index, "system"]
|
100 |
lp = data.loc[current_index, "lp"]
|
|
|
102 |
id = int(data.loc[current_index, "id"])
|
103 |
reference = data.loc[current_index, "reference"]
|
104 |
|
105 |
+
if subcategory:
|
106 |
+
category_value = f"{category}/{subcategory}"
|
107 |
+
else if other:
|
108 |
+
category_value = f"{category}/{other}"
|
109 |
|
110 |
if error_span and error_span in target:
|
111 |
start = target.find(error_span)
|
|
|
119 |
"severity": severity.lower(),
|
120 |
"start": start,
|
121 |
"end": end
|
|
|
|
|
|
|
122 |
"category": category_value,
|
|
|
123 |
})
|
124 |
|
125 |
# [error_span, status]
|
|
|
127 |
|
128 |
|
129 |
def save_and_next(source, target, score, rater_selector):
|
130 |
+
global current_index, data, annotations_file, current_errors
|
131 |
|
132 |
system = data.loc[current_index, "system"]
|
133 |
lp = data.loc[current_index, "lp"]
|
|
|
144 |
"src": source,
|
145 |
"mt": target,
|
146 |
"ref": reference,
|
147 |
+
"esa_score": score,
|
148 |
+
"esa_spans": current_errors,
|
|
|
149 |
}
|
150 |
save_to_json(annotations_entry, annotations_file)
|
151 |
|
152 |
# 清空當前錯誤緩存
|
153 |
current_errors = []
|
|
|
154 |
|
155 |
current_index += 1
|
156 |
if current_index >= len(data):
|
|
|
185 |
return get_current_text() + (str(selected_index), f"已跳轉至 id: {selected_index}")
|
186 |
|
187 |
categories = {
|
|
|
188 |
"Accuracy": ["Mistranslation", "Addition", "Omission", "Other"],
|
189 |
"Fluency": ["Grammar", "Spelling", "Punctuation", "Inconsistency", "Register", "Other"],
|
190 |
"Terminology": ["Inappropriate", "Inconsistent", "Other"],
|
191 |
"Style": ["Awkward", "Other"],
|
192 |
"Locale": ["Currency format", "Time format", "Name format", "Date format", "Address format", "Other"],
|
|
|
193 |
}
|
194 |
+
severity_choices = ["Minor", "Major"]
|
195 |
rater = ['rater1', 'rater2','rater3', 'rater4', 'rater5', 'rater6', 'rater7']
|
196 |
|
197 |
+
def mark_as_correct():
|
198 |
+
return "", "標註為完全正確,無錯誤!"
|
199 |
+
def mark_as_too_many_errors():
|
200 |
+
global current_errors
|
201 |
+
|
202 |
+
current_errors.append({
|
203 |
+
"text": "",
|
204 |
+
"severity": "major",
|
205 |
+
"start": 0,
|
206 |
+
"end": 0,
|
207 |
+
"category": "Non-translation"
|
208 |
+
})
|
209 |
+
return "", "已標註為過多錯誤!"
|
210 |
+
|
211 |
DEMO_EXPLANATION = """
|
212 |
## 翻譯標記工具
|
213 |
### 使用規則 [更多細節](https://huggingface.co/spaces/350016z/TranslationError_Gradio/blob/main/README.md)
|
|
|
224 |
- 33:保留部分原文意思,但有明顯遺漏,敘述難以理解,文法可能很差。
|
225 |
- 66:保留大部分原文意思,有一些文法錯誤或輕微不一致。
|
226 |
- 100:原文意思和文法完全正確。
|
227 |
+
(即使選擇 **No-error(沒有錯誤)**,分數也不一定需要評100分)
|
228 |
"""
|
229 |
|
230 |
+
with gr.Blocks(css="""
|
231 |
+
#correct_button {
|
232 |
+
background-color: #4CAF50;
|
233 |
+
color: white;
|
234 |
+
}
|
235 |
+
#too_many_errors_button {
|
236 |
+
background-color: #f44336;
|
237 |
+
color: white;
|
238 |
+
}
|
239 |
+
""") as demo:
|
240 |
gr.Markdown(DEMO_EXPLANATION)
|
241 |
|
242 |
with gr.Tab("標記工具"):
|
|
|
254 |
with gr.Row(variant='panel', equal_height=True):
|
255 |
with gr.Column(scale=1):
|
256 |
error_span = gr.Textbox(label="錯誤區間 (💡可以直接複製「翻譯文本」欄位,並在此貼上)", lines=6, placeholder="請輸入翻譯中文本的錯誤區間 (如無錯誤則不需)")
|
257 |
+
with gr.Column(scale=1):
|
258 |
+
correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
|
259 |
+
too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
|
260 |
with gr.Column(scale=1):
|
261 |
with gr.Row(equal_height=True):
|
262 |
category = gr.Dropdown(label="錯誤類別", choices=list(categories.keys()), value="No-error")
|
|
|
285 |
file_selector.change(update_file_selection, inputs=[file_selector], outputs=[source, target, error_span, index_selector, current_index_display, status])
|
286 |
index_selector.change(update_index_selection, inputs=[index_selector], outputs=[source, target, current_index_display, status])
|
287 |
category.change(update_subcategories, inputs=[category], outputs=[subcategory])
|
288 |
+
|
289 |
+
correct_button.click(mark_as_correct, outputs=[error_span, status])
|
290 |
+
too_many_errors_button.click(mark_as_too_many_errors, outputs=[error_span, status])
|
291 |
+
|
292 |
save_current_button.click(save_current, inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other], outputs=[error_span, status])
|
293 |
save_next_button.click(save_and_next, inputs=[source, target, score, rater_selector], outputs=[source, target, error_span, current_index_display, status])
|
294 |
|