Spaces:

adalat-ai
/

wer-analysis

Sleeping

App Files Files Community

janaab commited on Oct 19, 2024

Commit

0b203ab

verified ·

1 Parent(s): 20116ed

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -18

app.py CHANGED Viewed

@@ -14,27 +14,30 @@ def highlight_errors(ground_truth, hypothesis):
     gt_index = 0
     hyp_index = 0
-    for op in measures['ops']:
-        for chunk in op:
-            if chunk.type == 'equal':
                 # Add equal words without highlighting
-                highlighted_hyp.extend(gt_words[gt_index:gt_index + (chunk.ref_end_idx - chunk.ref_start_idx)])
-                gt_index += (chunk.ref_end_idx - chunk.ref_start_idx)
-                hyp_index += (chunk.hyp_end_idx - chunk.hyp_start_idx)
-            elif chunk.type == 'insert':
                 # Highlight inserted words in green
-                highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
-                hyp_index += 1
-            elif chunk.type == 'sub':
-                # Highlight substituted words in purple
                 highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>')
-                highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')  # Strikethrough for substitution
-                gt_index += 1  # Move in ground truth
-                hyp_index += 1  # Move in hypothesis
-            elif chunk.type == 'delete':
                 # Highlight deleted words in red with strikethrough
                 highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
-                gt_index += 1  # Move in ground truth
     # Handle any remaining words in hypothesis as insertions
     while hyp_index < len(hyp_words):
@@ -70,13 +73,13 @@ interface = gr.Interface(
     fn=highlight_errors,
     inputs=["text", "text"],
     outputs=[
-        gr.Markdown(label="Highlighted Transcript with Legend"),
         gr.Number(label="Word Error Rate"),
         gr.Number(label="Substitutions"),
         gr.Number(label="Insertions"),
         gr.Number(label="Deletions")
     ],
-    title="WER Calculator with Error Highlighting and Legend"
 )
 interface.launch()

     gt_index = 0
     hyp_index = 0
+    # Process each chunk of alignment (e.g., equal, insert, substitute, delete)
+    for chunk in measures['ops']:
+        for alignment in chunk:
+            if alignment.type == 'equal':
                 # Add equal words without highlighting
+                for i in range(alignment.ref_start_idx, alignment.ref_end_idx):
+                    highlighted_hyp.append(gt_words[i])  # Add ground truth word as is
+                gt_index = alignment.ref_end_idx
+                hyp_index = alignment.hyp_end_idx
+            elif alignment.type == 'insert':
                 # Highlight inserted words in green
+                for i in range(alignment.hyp_start_idx, alignment.hyp_end_idx):
+                    highlighted_hyp.append(f'<span style="color:green;">{hyp_words[i]}</span>')
+                hyp_index = alignment.hyp_end_idx
+            elif alignment.type == 'sub':
+                # Highlight substituted words in purple and strikethrough for ground truth in red
                 highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>')
+                highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
+                gt_index += 1
+                hyp_index += 1
+            elif alignment.type == 'delete':
                 # Highlight deleted words in red with strikethrough
                 highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
+                gt_index += 1
     # Handle any remaining words in hypothesis as insertions
     while hyp_index < len(hyp_words):
     fn=highlight_errors,
     inputs=["text", "text"],
     outputs=[
+        gr.Markdown(label="Highlighted Transcript"),
         gr.Number(label="Word Error Rate"),
         gr.Number(label="Substitutions"),
         gr.Number(label="Insertions"),
         gr.Number(label="Deletions")
     ],
+    title="WER Analysis"
 )
 interface.launch()