Spaces:

OUTCOMESAI
/

WER

Sleeping

App Files Files Community

piyushmaharana commited on Feb 12

Commit

a7e5c6a

verified ·

1 Parent(s): 4c80028

Update app.py

Browse files

Files changed (1) hide show

app.py +157 -2

app.py CHANGED Viewed

@@ -1,4 +1,159 @@
 import streamlit as st
-x = st.slider('Select a value')
-st.write(x, 'squared is', x * x)

 import streamlit as st
+import jiwer
+import pandas as pd
+from typing import List, Optional
+def calculate_wer_metrics(
+    hypothesis: str,
+    reference: str,
+    normalize: bool = True,
+    words_to_filter: Optional[List[str]] = None
+) -> dict:
+    """
+    Calculate WER metrics between hypothesis and reference texts.
+    Args:
+        hypothesis (str): The hypothesis text
+        reference (str): The reference text
+        normalize (bool): Whether to normalize texts before comparison
+        words_to_filter (List[str], optional): Words to filter out before comparison
+    Returns:
+        dict: Dictionary containing WER metrics
+    """
+    # Create transformation pipeline
+    if normalize:
+        transformation = jiwer.Compose([
+            jiwer.ToLowerCase(),
+            jiwer.RemoveMultipleSpaces(),
+            jiwer.RemovePunctuation(),
+            jiwer.Strip()
+        ])
+        # Add custom word filtering if specified
+        if words_to_filter:
+            transformation = jiwer.Compose([
+                transformation,
+                lambda x: ' '.join(word for word in x.split()
+                                 if word.lower() not in [w.lower() for w in words_to_filter])
+            ])
+    else:
+        transformation = None
+    # Calculate WER measures
+    measures = jiwer.compute_measures(
+        reference=reference,
+        hypothesis=hypothesis,
+        truth_transform=transformation,
+        hypothesis_transform=transformation
+    )
+    return measures
+def main():
+    st.set_page_config(
+        page_title="WER Evaluation Tool",
+        page_icon="🎯",
+        layout="wide"
+    )
+    st.title("Word Error Rate (WER) Evaluation Tool")
+    st.markdown("""
+    This tool helps you evaluate the Word Error Rate (WER) between a reference text and a hypothesis text.
+    WER is commonly used in speech recognition and machine translation evaluation.
+    """)
+    # Example button
+    if st.button("Load Example"):
+        reference = "the quick brown fox jumps over the lazy dog"
+        hypothesis = "the quick brown fox jumped over lazy dog"
+    else:
+        reference = ""
+        hypothesis = ""
+    # Input fields
+    col1, col2 = st.columns(2)
+    with col1:
+        reference = st.text_area(
+            "Reference Text",
+            value=reference,
+            height=150,
+            placeholder="Enter the reference text here..."
+        )
+    with col2:
+        hypothesis = st.text_area(
+            "Hypothesis Text",
+            value=hypothesis,
+            height=150,
+            placeholder="Enter the hypothesis text here..."
+        )
+    # Options
+    normalize = st.checkbox("Normalize text (lowercase, remove punctuation)", value=True)
+    words_to_filter_input = st.text_input(
+        "Words to filter (comma-separated)",
+        placeholder="e.g., um, uh, ah"
+    )
+    words_to_filter = [word.strip() for word in words_to_filter_input.split(",")] if words_to_filter_input else None
+    # Calculate button
+    if st.button("Calculate WER"):
+        if not reference or not hypothesis:
+            st.error("Please provide both reference and hypothesis texts.")
+            return
+        try:
+            measures = calculate_wer_metrics(
+                hypothesis=hypothesis,
+                reference=reference,
+                normalize=normalize,
+                words_to_filter=words_to_filter
+            )
+            # Display results
+            col1, col2 = st.columns(2)
+            with col1:
+                st.subheader("Main Metrics")
+                metrics_df = pd.DataFrame({
+                    'Metric': ['WER', 'MER', 'WIL', 'WIP'],
+                    'Value': [
+                        f"{measures['wer']:.3f}",
+                        f"{measures['mer']:.3f}",
+                        f"{measures['wil']:.3f}",
+                        f"{measures['wip']:.3f}"
+                    ]
+                })
+                st.table(metrics_df)
+            with col2:
+                st.subheader("Error Analysis")
+                error_df = pd.DataFrame({
+                    'Metric': ['Substitutions', 'Deletions', 'Insertions', 'Hits'],
+                    'Count': [
+                        measures['substitutions'],
+                        measures['deletions'],
+                        measures['insertions'],
+                        measures['hits']
+                    ]
+                })
+                st.table(error_df)
+            # Add explanations
+            st.markdown("""
+            ### Metrics Explanation:
+            - **WER (Word Error Rate)**: The percentage of words that were incorrectly predicted
+            - **MER (Match Error Rate)**: The percentage of words that were incorrectly matched
+            - **WIL (Word Information Lost)**: The percentage of word information that was lost
+            - **WIP (Word Information Preserved)**: The percentage of word information that was preserved
+            """)
+        except Exception as e:
+            st.error(f"Error calculating WER: {str(e)}")
+if __name__ == "__main__":
+    main()