Spaces:

GenSEC-LLM
/

Post-ASR-LLM-Transcription-Correction

Running

App Files Files Community

huckiyang commited on Mar 14

Commit

0d06f36

1 Parent(s): 6821e8c

refines

Browse files

Files changed (1) hide show

app.py +54 -1

app.py CHANGED Viewed

@@ -239,14 +239,67 @@ def get_wer_metrics(dataset):
     lm_ranking_row = {"Methods": "N-gram Ranking"}
     n_best_row = {"Methods": "Subwords Voting"}
     for source in all_sources + ["OVERALL"]:
         no_lm_row[source] = source_results[source]["No LM Baseline"]
         lm_ranking_row[source] = source_results[source]["N-best LM Ranking"]
         n_best_row[source] = source_results[source]["N-best Correction"]
     rows.append(no_lm_row)
     rows.append(lm_ranking_row)
     rows.append(n_best_row)
     # Create DataFrame from rows
     result_df = pd.DataFrame(rows)

     lm_ranking_row = {"Methods": "N-gram Ranking"}
     n_best_row = {"Methods": "Subwords Voting"}
+    # Add the additional methods from the figure
+    llama_lora_row = {"Methods": "LLaMA-7B-LoRA"}
+    nb_oracle_row = {"Methods": "N-best Oracle (o_nb)"}
+    cp_oracle_row = {"Methods": "Compositional Oracle (o_cp)"}
+    # Populate the existing methods
     for source in all_sources + ["OVERALL"]:
         no_lm_row[source] = source_results[source]["No LM Baseline"]
         lm_ranking_row[source] = source_results[source]["N-best LM Ranking"]
         n_best_row[source] = source_results[source]["N-best Correction"]
+        # Add hardcoded values for the additional methods based on the figure
+        # Default to NaN for sources not in the figure
+        llama_lora_row[source] = np.nan
+        nb_oracle_row[source] = np.nan
+        cp_oracle_row[source] = np.nan
+    # Add hardcoded values from the figure for each source
+    # CHiME-4
+    if "test_chime4" in all_sources:
+        llama_lora_row["test_chime4"] = 6.6 / 100  # Convert from percentage
+        nb_oracle_row["test_chime4"] = 9.1 / 100
+        cp_oracle_row["test_chime4"] = 2.8 / 100
+    # Tedlium-3
+    if "test_td3" in all_sources:
+        llama_lora_row["test_td3"] = 4.6 / 100
+        nb_oracle_row["test_td3"] = 3.0 / 100
+        cp_oracle_row["test_td3"] = 0.7 / 100
+    # CommonVoice (CV-accent)
+    if "test_cv" in all_sources:
+        llama_lora_row["test_cv"] = 11.0 / 100
+        nb_oracle_row["test_cv"] = 11.4 / 100
+        cp_oracle_row["test_cv"] = 7.9 / 100
+    # SwitchBoard
+    if "test_swbd" in all_sources:
+        llama_lora_row["test_swbd"] = 14.1 / 100
+        nb_oracle_row["test_swbd"] = 12.6 / 100
+        cp_oracle_row["test_swbd"] = 4.2 / 100
+    # LRS2
+    if "test_lrs2" in all_sources:
+        llama_lora_row["test_lrs2"] = 8.8 / 100
+        nb_oracle_row["test_lrs2"] = 6.9 / 100
+        cp_oracle_row["test_lrs2"] = 2.6 / 100
+    # CORAAL
+    if "test_coraal" in all_sources:
+        llama_lora_row["test_coraal"] = 19.2 / 100
+        nb_oracle_row["test_coraal"] = 21.8 / 100
+        cp_oracle_row["test_coraal"] = 10.7 / 100
+    # Add rows in the desired order
     rows.append(no_lm_row)
     rows.append(lm_ranking_row)
     rows.append(n_best_row)
+    rows.append(llama_lora_row)
+    rows.append(nb_oracle_row)
+    rows.append(cp_oracle_row)
     # Create DataFrame from rows
     result_df = pd.DataFrame(rows)