refines
Browse files
app.py
CHANGED
@@ -239,14 +239,67 @@ def get_wer_metrics(dataset):
|
|
239 |
lm_ranking_row = {"Methods": "N-gram Ranking"}
|
240 |
n_best_row = {"Methods": "Subwords Voting"}
|
241 |
|
|
|
|
|
|
|
|
|
|
|
|
|
242 |
for source in all_sources + ["OVERALL"]:
|
243 |
no_lm_row[source] = source_results[source]["No LM Baseline"]
|
244 |
lm_ranking_row[source] = source_results[source]["N-best LM Ranking"]
|
245 |
n_best_row[source] = source_results[source]["N-best Correction"]
|
246 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
247 |
rows.append(no_lm_row)
|
248 |
rows.append(lm_ranking_row)
|
249 |
rows.append(n_best_row)
|
|
|
|
|
|
|
250 |
|
251 |
# Create DataFrame from rows
|
252 |
result_df = pd.DataFrame(rows)
|
|
|
239 |
lm_ranking_row = {"Methods": "N-gram Ranking"}
|
240 |
n_best_row = {"Methods": "Subwords Voting"}
|
241 |
|
242 |
+
# Add the additional methods from the figure
|
243 |
+
llama_lora_row = {"Methods": "LLaMA-7B-LoRA"}
|
244 |
+
nb_oracle_row = {"Methods": "N-best Oracle (o_nb)"}
|
245 |
+
cp_oracle_row = {"Methods": "Compositional Oracle (o_cp)"}
|
246 |
+
|
247 |
+
# Populate the existing methods
|
248 |
for source in all_sources + ["OVERALL"]:
|
249 |
no_lm_row[source] = source_results[source]["No LM Baseline"]
|
250 |
lm_ranking_row[source] = source_results[source]["N-best LM Ranking"]
|
251 |
n_best_row[source] = source_results[source]["N-best Correction"]
|
252 |
+
|
253 |
+
# Add hardcoded values for the additional methods based on the figure
|
254 |
+
# Default to NaN for sources not in the figure
|
255 |
+
llama_lora_row[source] = np.nan
|
256 |
+
nb_oracle_row[source] = np.nan
|
257 |
+
cp_oracle_row[source] = np.nan
|
258 |
+
|
259 |
+
# Add hardcoded values from the figure for each source
|
260 |
+
# CHiME-4
|
261 |
+
if "test_chime4" in all_sources:
|
262 |
+
llama_lora_row["test_chime4"] = 6.6 / 100 # Convert from percentage
|
263 |
+
nb_oracle_row["test_chime4"] = 9.1 / 100
|
264 |
+
cp_oracle_row["test_chime4"] = 2.8 / 100
|
265 |
+
|
266 |
+
# Tedlium-3
|
267 |
+
if "test_td3" in all_sources:
|
268 |
+
llama_lora_row["test_td3"] = 4.6 / 100
|
269 |
+
nb_oracle_row["test_td3"] = 3.0 / 100
|
270 |
+
cp_oracle_row["test_td3"] = 0.7 / 100
|
271 |
+
|
272 |
+
# CommonVoice (CV-accent)
|
273 |
+
if "test_cv" in all_sources:
|
274 |
+
llama_lora_row["test_cv"] = 11.0 / 100
|
275 |
+
nb_oracle_row["test_cv"] = 11.4 / 100
|
276 |
+
cp_oracle_row["test_cv"] = 7.9 / 100
|
277 |
+
|
278 |
+
# SwitchBoard
|
279 |
+
if "test_swbd" in all_sources:
|
280 |
+
llama_lora_row["test_swbd"] = 14.1 / 100
|
281 |
+
nb_oracle_row["test_swbd"] = 12.6 / 100
|
282 |
+
cp_oracle_row["test_swbd"] = 4.2 / 100
|
283 |
+
|
284 |
+
# LRS2
|
285 |
+
if "test_lrs2" in all_sources:
|
286 |
+
llama_lora_row["test_lrs2"] = 8.8 / 100
|
287 |
+
nb_oracle_row["test_lrs2"] = 6.9 / 100
|
288 |
+
cp_oracle_row["test_lrs2"] = 2.6 / 100
|
289 |
+
|
290 |
+
# CORAAL
|
291 |
+
if "test_coraal" in all_sources:
|
292 |
+
llama_lora_row["test_coraal"] = 19.2 / 100
|
293 |
+
nb_oracle_row["test_coraal"] = 21.8 / 100
|
294 |
+
cp_oracle_row["test_coraal"] = 10.7 / 100
|
295 |
+
|
296 |
+
# Add rows in the desired order
|
297 |
rows.append(no_lm_row)
|
298 |
rows.append(lm_ranking_row)
|
299 |
rows.append(n_best_row)
|
300 |
+
rows.append(llama_lora_row)
|
301 |
+
rows.append(nb_oracle_row)
|
302 |
+
rows.append(cp_oracle_row)
|
303 |
|
304 |
# Create DataFrame from rows
|
305 |
result_df = pd.DataFrame(rows)
|