Spaces:

GenSEC-LLM
/

Post-ASR-LLM-Transcription-Correction

Running

App Files Files Community

huckiyang commited on 25 days ago

Commit

7ec068d

1 Parent(s): 3c6aeb7

optz the data loading

Browse files

Files changed (1) hide show

app.py +157 -73

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from datasets import load_dataset
 import jiwer
 import numpy as np
 from functools import lru_cache
 # Cache the dataset loading to avoid reloading on refresh
 @lru_cache(maxsize=1)
@@ -15,89 +16,151 @@ def calculate_wer(examples):
     if not examples:
         return 0.0
-    # Filter valid examples in a single pass
-    valid_pairs = [(ex.get("transcription", "").strip(), ex.get("input1", "").strip())
-                  for ex in examples
-                  if ex.get("transcription") and ex.get("input1")]
-    if not valid_pairs:
         return np.nan
-    # Unzip the pairs in one operation
-    references, hypotheses = zip(*valid_pairs) if valid_pairs else ([], [])
-    # Calculate WER
-    return jiwer.wer(references, hypotheses)
 # Get WER metrics by source and split
 def get_wer_metrics(dataset):
-    # Pre-process the data to avoid repeated filtering
-    train_by_source = {}
-    test_by_source = {}
-    # Group examples by source in a single pass for each split
-    for ex in dataset["train"]:
-        source = ex["source"]
-        if source not in train_by_source:
-            train_by_source[source] = []
-        train_by_source[source].append(ex)
-    for ex in dataset["test"]:
-        source = ex["source"]
-        if source not in test_by_source:
-            test_by_source[source] = []
-        test_by_source[source].append(ex)
-    # Get all unique sources
-    all_sources = sorted(set(train_by_source.keys()) | set(test_by_source.keys()))
-    # Calculate metrics for each source
-    results = []
-    for source in all_sources:
-        train_examples = train_by_source.get(source, [])
-        test_examples = test_by_source.get(source, [])
-        train_count = len(train_examples)
-        test_count = len(test_examples)
-        train_wer = calculate_wer(train_examples) if train_count > 0 else np.nan
-        test_wer = calculate_wer(test_examples) if test_count > 0 else np.nan
-        results.append({
-            "Source": source,
-            "Train Count": train_count,
-            "Train WER": train_wer,
-            "Test Count": test_count,
-            "Test WER": test_wer
-        })
-    # Calculate overall metrics once
-    train_wer = calculate_wer(dataset["train"])
-    test_wer = calculate_wer(dataset["test"])
-    results.append({
-        "Source": "OVERALL",
-        "Train Count": len(dataset["train"]),
-        "Train WER": train_wer,
-        "Test Count": len(dataset["test"]),
-        "Test WER": test_wer
-    })
-    return pd.DataFrame(results)
 # Format the dataframe for display
 def format_dataframe(df):
-    # Use vectorized operations instead of apply
-    df = df.copy()
-    mask = df["Train WER"].notna()
-    df.loc[mask, "Train WER"] = df.loc[mask, "Train WER"].map(lambda x: f"{x:.4f}")
-    df.loc[~mask, "Train WER"] = "N/A"
-    mask = df["Test WER"].notna()
-    df.loc[mask, "Test WER"] = df.loc[mask, "Test WER"].map(lambda x: f"{x:.4f}")
-    df.loc[~mask, "Test WER"] = "N/A"
-    return df
 # Main function to create the leaderboard
 def create_leaderboard():
@@ -106,7 +169,9 @@ def create_leaderboard():
         metrics_df = get_wer_metrics(dataset)
         return format_dataframe(metrics_df)
     except Exception as e:
-        return pd.DataFrame({"Error": [str(e)]})
 # Create the Gradio interface
 with gr.Blocks(title="ASR Text Correction Leaderboard") as demo:
@@ -117,9 +182,28 @@ with gr.Blocks(title="ASR Text Correction Leaderboard") as demo:
         refresh_btn = gr.Button("Refresh Leaderboard")
     with gr.Row():
-        leaderboard = gr.DataFrame(create_leaderboard())
-    refresh_btn.click(create_leaderboard, outputs=leaderboard)
 if __name__ == "__main__":
     demo.launch()

 import jiwer
 import numpy as np
 from functools import lru_cache
+import traceback
 # Cache the dataset loading to avoid reloading on refresh
 @lru_cache(maxsize=1)
     if not examples:
         return 0.0
+    try:
+        # Filter valid examples in a single pass
+        valid_pairs = []
+        for ex in examples:
+            try:
+                transcription = ex.get("transcription", "")
+                input1 = ex.get("input1", "")
+                # Only add valid pairs
+                if transcription and input1:
+                    # Limit text length to avoid potential issues
+                    transcription = transcription.strip()[:1000]  # Limit to 1000 chars
+                    input1 = input1.strip()[:1000]
+                    valid_pairs.append((transcription, input1))
+            except Exception as ex_error:
+                # Skip problematic examples but continue processing
+                print(f"Error processing example: {str(ex_error)}")
+                continue
+        if not valid_pairs:
+            return np.nan
+        # Unzip the pairs in one operation
+        references, hypotheses = zip(*valid_pairs) if valid_pairs else ([], [])
+        # Calculate WER
+        return jiwer.wer(references, hypotheses)
+    except Exception as e:
+        print(f"Error in calculate_wer: {str(e)}")
+        print(traceback.format_exc())
         return np.nan
 # Get WER metrics by source and split
 def get_wer_metrics(dataset):
+    try:
+        # Pre-process the data to avoid repeated filtering
+        train_by_source = {}
+        test_by_source = {}
+        # Group examples by source in a single pass for each split
+        for ex in dataset["train"]:
+            try:
+                source = ex.get("source", "unknown")
+                if source not in train_by_source:
+                    train_by_source[source] = []
+                train_by_source[source].append(ex)
+            except Exception as e:
+                print(f"Error processing train example: {str(e)}")
+                continue
+        for ex in dataset["test"]:
+            try:
+                source = ex.get("source", "unknown")
+                if source not in test_by_source:
+                    test_by_source[source] = []
+                test_by_source[source].append(ex)
+            except Exception as e:
+                print(f"Error processing test example: {str(e)}")
+                continue
+        # Get all unique sources
+        all_sources = sorted(set(train_by_source.keys()) | set(test_by_source.keys()))
+        # Calculate metrics for each source
+        results = []
+        for source in all_sources:
+            try:
+                train_examples = train_by_source.get(source, [])
+                test_examples = test_by_source.get(source, [])
+                train_count = len(train_examples)
+                test_count = len(test_examples)
+                train_wer = calculate_wer(train_examples) if train_count > 0 else np.nan
+                test_wer = calculate_wer(test_examples) if test_count > 0 else np.nan
+                results.append({
+                    "Source": source,
+                    "Train Count": train_count,
+                    "Train WER": train_wer,
+                    "Test Count": test_count,
+                    "Test WER": test_wer
+                })
+            except Exception as e:
+                print(f"Error processing source {source}: {str(e)}")
+                results.append({
+                    "Source": source,
+                    "Train Count": 0,
+                    "Train WER": np.nan,
+                    "Test Count": 0,
+                    "Test WER": np.nan
+                })
+        # Calculate overall metrics once
+        try:
+            train_wer = calculate_wer(dataset["train"])
+            test_wer = calculate_wer(dataset["test"])
+            results.append({
+                "Source": "OVERALL",
+                "Train Count": len(dataset["train"]),
+                "Train WER": train_wer,
+                "Test Count": len(dataset["test"]),
+                "Test WER": test_wer
+            })
+        except Exception as e:
+            print(f"Error calculating overall metrics: {str(e)}")
+            results.append({
+                "Source": "OVERALL",
+                "Train Count": len(dataset["train"]),
+                "Train WER": np.nan,
+                "Test Count": len(dataset["test"]),
+                "Test WER": np.nan
+            })
+        return pd.DataFrame(results)
+    except Exception as e:
+        print(f"Error in get_wer_metrics: {str(e)}")
+        print(traceback.format_exc())
+        return pd.DataFrame([{"Error": str(e)}])
 # Format the dataframe for display
 def format_dataframe(df):
+    try:
+        # Use vectorized operations instead of apply
+        df = df.copy()
+        if "Train WER" in df.columns:
+            mask = df["Train WER"].notna()
+            df.loc[mask, "Train WER"] = df.loc[mask, "Train WER"].map(lambda x: f"{x:.4f}")
+            df.loc[~mask, "Train WER"] = "N/A"
+        if "Test WER" in df.columns:
+            mask = df["Test WER"].notna()
+            df.loc[mask, "Test WER"] = df.loc[mask, "Test WER"].map(lambda x: f"{x:.4f}")
+            df.loc[~mask, "Test WER"] = "N/A"
+        return df
+    except Exception as e:
+        print(f"Error in format_dataframe: {str(e)}")
+        print(traceback.format_exc())
+        return pd.DataFrame([{"Error": str(e)}])
 # Main function to create the leaderboard
 def create_leaderboard():
         metrics_df = get_wer_metrics(dataset)
         return format_dataframe(metrics_df)
     except Exception as e:
+        error_msg = f"Error creating leaderboard: {str(e)}\n{traceback.format_exc()}"
+        print(error_msg)
+        return pd.DataFrame([{"Error": error_msg}])
 # Create the Gradio interface
 with gr.Blocks(title="ASR Text Correction Leaderboard") as demo:
         refresh_btn = gr.Button("Refresh Leaderboard")
     with gr.Row():
+        error_output = gr.Textbox(label="Errors (if any)")
+    with gr.Row():
+        try:
+            initial_df = create_leaderboard()
+            leaderboard = gr.DataFrame(initial_df)
+        except Exception as e:
+            error_msg = f"Error initializing leaderboard: {str(e)}\n{traceback.format_exc()}"
+            print(error_msg)
+            error_output.update(value=error_msg)
+            leaderboard = gr.DataFrame(pd.DataFrame([{"Error": error_msg}]))
+    def refresh_and_report():
+        try:
+            df = create_leaderboard()
+            return df, ""
+        except Exception as e:
+            error_msg = f"Error refreshing leaderboard: {str(e)}\n{traceback.format_exc()}"
+            print(error_msg)
+            return pd.DataFrame([{"Error": error_msg}]), error_msg
+    refresh_btn.click(refresh_and_report, outputs=[leaderboard, error_output])
 if __name__ == "__main__":
     demo.launch()