350016z commited on
Commit
77fceba
·
verified ·
1 Parent(s): 06d8d2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -20
app.py CHANGED
@@ -69,7 +69,6 @@ data = pd.read_csv(data_path, dtype={"id": "Int64"}) # 確保 id 為標準 Pytho
69
 
70
  current_index = 0
71
  current_errors = []
72
- current_others = []
73
 
74
  annotations_file = DATASET_DIR / f"test-{uuid4()}.json"
75
  # ---------------------------------------------------------------------------------------------------------------------
@@ -96,9 +95,6 @@ def save_to_json(entry: dict, json_file: Path):
96
 
97
  def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
98
  global current_index, data, current_errors
99
-
100
- if category == "No-error":
101
- return "", "無錯誤,不需要保存錯誤區間。"
102
 
103
  system = data.loc[current_index, "system"]
104
  lp = data.loc[current_index, "lp"]
@@ -106,10 +102,10 @@ def save_current(source, target, rater_selector, error_span, category, subcatego
106
  id = int(data.loc[current_index, "id"])
107
  reference = data.loc[current_index, "reference"]
108
 
109
- if category != "Non-translation":
110
- category_value = f"{category}/{subcategory}"
111
- else:
112
- category_value = category
113
 
114
  if error_span and error_span in target:
115
  start = target.find(error_span)
@@ -123,11 +119,7 @@ def save_current(source, target, rater_selector, error_span, category, subcatego
123
  "severity": severity.lower(),
124
  "start": start,
125
  "end": end
126
- })
127
-
128
- current_others.append({
129
  "category": category_value,
130
- "others": other if other else "",
131
  })
132
 
133
  # [error_span, status]
@@ -135,7 +127,7 @@ def save_current(source, target, rater_selector, error_span, category, subcatego
135
 
136
 
137
  def save_and_next(source, target, score, rater_selector):
138
- global current_index, data, annotations_file, current_errors, current_others
139
 
140
  system = data.loc[current_index, "system"]
141
  lp = data.loc[current_index, "lp"]
@@ -152,15 +144,13 @@ def save_and_next(source, target, score, rater_selector):
152
  "src": source,
153
  "mt": target,
154
  "ref": reference,
155
- "sentence_score": score,
156
- "errors": current_errors,
157
- "others": current_others,
158
  }
159
  save_to_json(annotations_entry, annotations_file)
160
 
161
  # 清空當前錯誤緩存
162
  current_errors = []
163
- current_others = []
164
 
165
  current_index += 1
166
  if current_index >= len(data):
@@ -195,16 +185,29 @@ def update_index_selection(selected_index):
195
  return get_current_text() + (str(selected_index), f"已跳轉至 id: {selected_index}")
196
 
197
  categories = {
198
- "No-error": [],
199
  "Accuracy": ["Mistranslation", "Addition", "Omission", "Other"],
200
  "Fluency": ["Grammar", "Spelling", "Punctuation", "Inconsistency", "Register", "Other"],
201
  "Terminology": ["Inappropriate", "Inconsistent", "Other"],
202
  "Style": ["Awkward", "Other"],
203
  "Locale": ["Currency format", "Time format", "Name format", "Date format", "Address format", "Other"],
204
- "Non-translation": []
205
  }
 
206
  rater = ['rater1', 'rater2','rater3', 'rater4', 'rater5', 'rater6', 'rater7']
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  DEMO_EXPLANATION = """
209
  ## 翻譯標記工具
210
  ### 使用規則 [更多細節](https://huggingface.co/spaces/350016z/TranslationError_Gradio/blob/main/README.md)
@@ -221,9 +224,19 @@ DEMO_EXPLANATION = """
221
  - 33:保留部分原文意思,但有明顯遺漏,敘述難以理解,文法可能很差。
222
  - 66:保留大部分原文意思,有一些文法錯誤或輕微不一致。
223
  - 100:原文意思和文法完全正確。
 
224
  """
225
 
226
- with gr.Blocks() as demo:
 
 
 
 
 
 
 
 
 
227
  gr.Markdown(DEMO_EXPLANATION)
228
 
229
  with gr.Tab("標記工具"):
@@ -241,6 +254,9 @@ with gr.Blocks() as demo:
241
  with gr.Row(variant='panel', equal_height=True):
242
  with gr.Column(scale=1):
243
  error_span = gr.Textbox(label="錯誤區間 (💡可以直接複製「翻譯文本」欄位,並在此貼上)", lines=6, placeholder="請輸入翻譯中文本的錯誤區間 (如無錯誤則不需)")
 
 
 
244
  with gr.Column(scale=1):
245
  with gr.Row(equal_height=True):
246
  category = gr.Dropdown(label="錯誤類別", choices=list(categories.keys()), value="No-error")
@@ -269,6 +285,10 @@ with gr.Blocks() as demo:
269
  file_selector.change(update_file_selection, inputs=[file_selector], outputs=[source, target, error_span, index_selector, current_index_display, status])
270
  index_selector.change(update_index_selection, inputs=[index_selector], outputs=[source, target, current_index_display, status])
271
  category.change(update_subcategories, inputs=[category], outputs=[subcategory])
 
 
 
 
272
  save_current_button.click(save_current, inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other], outputs=[error_span, status])
273
  save_next_button.click(save_and_next, inputs=[source, target, score, rater_selector], outputs=[source, target, error_span, current_index_display, status])
274
 
 
69
 
70
  current_index = 0
71
  current_errors = []
 
72
 
73
  annotations_file = DATASET_DIR / f"test-{uuid4()}.json"
74
  # ---------------------------------------------------------------------------------------------------------------------
 
95
 
96
  def save_current(source, target, rater_selector, error_span, category, subcategory, severity, other):
97
  global current_index, data, current_errors
 
 
 
98
 
99
  system = data.loc[current_index, "system"]
100
  lp = data.loc[current_index, "lp"]
 
102
  id = int(data.loc[current_index, "id"])
103
  reference = data.loc[current_index, "reference"]
104
 
105
+ if subcategory:
106
+ category_value = f"{category}/{subcategory}"
107
+ else if other:
108
+ category_value = f"{category}/{other}"
109
 
110
  if error_span and error_span in target:
111
  start = target.find(error_span)
 
119
  "severity": severity.lower(),
120
  "start": start,
121
  "end": end
 
 
 
122
  "category": category_value,
 
123
  })
124
 
125
  # [error_span, status]
 
127
 
128
 
129
  def save_and_next(source, target, score, rater_selector):
130
+ global current_index, data, annotations_file, current_errors
131
 
132
  system = data.loc[current_index, "system"]
133
  lp = data.loc[current_index, "lp"]
 
144
  "src": source,
145
  "mt": target,
146
  "ref": reference,
147
+ "esa_score": score,
148
+ "esa_spans": current_errors,
 
149
  }
150
  save_to_json(annotations_entry, annotations_file)
151
 
152
  # 清空當前錯誤緩存
153
  current_errors = []
 
154
 
155
  current_index += 1
156
  if current_index >= len(data):
 
185
  return get_current_text() + (str(selected_index), f"已跳轉至 id: {selected_index}")
186
 
187
  categories = {
 
188
  "Accuracy": ["Mistranslation", "Addition", "Omission", "Other"],
189
  "Fluency": ["Grammar", "Spelling", "Punctuation", "Inconsistency", "Register", "Other"],
190
  "Terminology": ["Inappropriate", "Inconsistent", "Other"],
191
  "Style": ["Awkward", "Other"],
192
  "Locale": ["Currency format", "Time format", "Name format", "Date format", "Address format", "Other"],
 
193
  }
194
+ severity_choices = ["Minor", "Major"]
195
  rater = ['rater1', 'rater2','rater3', 'rater4', 'rater5', 'rater6', 'rater7']
196
 
197
+ def mark_as_correct():
198
+ return "", "標註為完全正確,無錯誤!"
199
+ def mark_as_too_many_errors():
200
+ global current_errors
201
+
202
+ current_errors.append({
203
+ "text": "",
204
+ "severity": "major",
205
+ "start": 0,
206
+ "end": 0,
207
+ "category": "Non-translation"
208
+ })
209
+ return "", "已標註為過多錯誤!"
210
+
211
  DEMO_EXPLANATION = """
212
  ## 翻譯標記工具
213
  ### 使用規則 [更多細節](https://huggingface.co/spaces/350016z/TranslationError_Gradio/blob/main/README.md)
 
224
  - 33:保留部分原文意思,但有明顯遺漏,敘述難以理解,文法可能很差。
225
  - 66:保留大部分原文意思,有一些文法錯誤或輕微不一致。
226
  - 100:原文意思和文法完全正確。
227
+ (即使選擇 **No-error(沒有錯誤)**,分數也不一定需要評100分)
228
  """
229
 
230
+ with gr.Blocks(css="""
231
+ #correct_button {
232
+ background-color: #4CAF50;
233
+ color: white;
234
+ }
235
+ #too_many_errors_button {
236
+ background-color: #f44336;
237
+ color: white;
238
+ }
239
+ """) as demo:
240
  gr.Markdown(DEMO_EXPLANATION)
241
 
242
  with gr.Tab("標記工具"):
 
254
  with gr.Row(variant='panel', equal_height=True):
255
  with gr.Column(scale=1):
256
  error_span = gr.Textbox(label="錯誤區間 (💡可以直接複製「翻譯文本」欄位,並在此貼上)", lines=6, placeholder="請輸入翻譯中文本的錯誤區間 (如無錯誤則不需)")
257
+ with gr.Column(scale=1):
258
+ correct_button = gr.Button("✔ 完全正確", elem_id="correct_button")
259
+ too_many_errors_button = gr.Button("✖ 過多錯誤", elem_id="too_many_errors_button")
260
  with gr.Column(scale=1):
261
  with gr.Row(equal_height=True):
262
  category = gr.Dropdown(label="錯誤類別", choices=list(categories.keys()), value="No-error")
 
285
  file_selector.change(update_file_selection, inputs=[file_selector], outputs=[source, target, error_span, index_selector, current_index_display, status])
286
  index_selector.change(update_index_selection, inputs=[index_selector], outputs=[source, target, current_index_display, status])
287
  category.change(update_subcategories, inputs=[category], outputs=[subcategory])
288
+
289
+ correct_button.click(mark_as_correct, outputs=[error_span, status])
290
+ too_many_errors_button.click(mark_as_too_many_errors, outputs=[error_span, status])
291
+
292
  save_current_button.click(save_current, inputs=[source, target, rater_selector, error_span, category, subcategory, severity, other], outputs=[error_span, status])
293
  save_next_button.click(save_and_next, inputs=[source, target, score, rater_selector], outputs=[source, target, error_span, current_index_display, status])
294