huckiyang commited on
Commit
0d06f36
·
1 Parent(s): 6821e8c
Files changed (1) hide show
  1. app.py +54 -1
app.py CHANGED
@@ -239,14 +239,67 @@ def get_wer_metrics(dataset):
239
  lm_ranking_row = {"Methods": "N-gram Ranking"}
240
  n_best_row = {"Methods": "Subwords Voting"}
241
 
 
 
 
 
 
 
242
  for source in all_sources + ["OVERALL"]:
243
  no_lm_row[source] = source_results[source]["No LM Baseline"]
244
  lm_ranking_row[source] = source_results[source]["N-best LM Ranking"]
245
  n_best_row[source] = source_results[source]["N-best Correction"]
246
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  rows.append(no_lm_row)
248
  rows.append(lm_ranking_row)
249
  rows.append(n_best_row)
 
 
 
250
 
251
  # Create DataFrame from rows
252
  result_df = pd.DataFrame(rows)
 
239
  lm_ranking_row = {"Methods": "N-gram Ranking"}
240
  n_best_row = {"Methods": "Subwords Voting"}
241
 
242
+ # Add the additional methods from the figure
243
+ llama_lora_row = {"Methods": "LLaMA-7B-LoRA"}
244
+ nb_oracle_row = {"Methods": "N-best Oracle (o_nb)"}
245
+ cp_oracle_row = {"Methods": "Compositional Oracle (o_cp)"}
246
+
247
+ # Populate the existing methods
248
  for source in all_sources + ["OVERALL"]:
249
  no_lm_row[source] = source_results[source]["No LM Baseline"]
250
  lm_ranking_row[source] = source_results[source]["N-best LM Ranking"]
251
  n_best_row[source] = source_results[source]["N-best Correction"]
252
+
253
+ # Add hardcoded values for the additional methods based on the figure
254
+ # Default to NaN for sources not in the figure
255
+ llama_lora_row[source] = np.nan
256
+ nb_oracle_row[source] = np.nan
257
+ cp_oracle_row[source] = np.nan
258
+
259
+ # Add hardcoded values from the figure for each source
260
+ # CHiME-4
261
+ if "test_chime4" in all_sources:
262
+ llama_lora_row["test_chime4"] = 6.6 / 100 # Convert from percentage
263
+ nb_oracle_row["test_chime4"] = 9.1 / 100
264
+ cp_oracle_row["test_chime4"] = 2.8 / 100
265
+
266
+ # Tedlium-3
267
+ if "test_td3" in all_sources:
268
+ llama_lora_row["test_td3"] = 4.6 / 100
269
+ nb_oracle_row["test_td3"] = 3.0 / 100
270
+ cp_oracle_row["test_td3"] = 0.7 / 100
271
+
272
+ # CommonVoice (CV-accent)
273
+ if "test_cv" in all_sources:
274
+ llama_lora_row["test_cv"] = 11.0 / 100
275
+ nb_oracle_row["test_cv"] = 11.4 / 100
276
+ cp_oracle_row["test_cv"] = 7.9 / 100
277
+
278
+ # SwitchBoard
279
+ if "test_swbd" in all_sources:
280
+ llama_lora_row["test_swbd"] = 14.1 / 100
281
+ nb_oracle_row["test_swbd"] = 12.6 / 100
282
+ cp_oracle_row["test_swbd"] = 4.2 / 100
283
+
284
+ # LRS2
285
+ if "test_lrs2" in all_sources:
286
+ llama_lora_row["test_lrs2"] = 8.8 / 100
287
+ nb_oracle_row["test_lrs2"] = 6.9 / 100
288
+ cp_oracle_row["test_lrs2"] = 2.6 / 100
289
+
290
+ # CORAAL
291
+ if "test_coraal" in all_sources:
292
+ llama_lora_row["test_coraal"] = 19.2 / 100
293
+ nb_oracle_row["test_coraal"] = 21.8 / 100
294
+ cp_oracle_row["test_coraal"] = 10.7 / 100
295
+
296
+ # Add rows in the desired order
297
  rows.append(no_lm_row)
298
  rows.append(lm_ranking_row)
299
  rows.append(n_best_row)
300
+ rows.append(llama_lora_row)
301
+ rows.append(nb_oracle_row)
302
+ rows.append(cp_oracle_row)
303
 
304
  # Create DataFrame from rows
305
  result_df = pd.DataFrame(rows)