Spaces:
Sleeping
Sleeping
import gradio as gr | |
from jiwer import wer, compute_measures | |
# Function to highlight errors | |
def highlight_errors(ground_truth, hypothesis): | |
measures = compute_measures(ground_truth, hypothesis) | |
highlighted_hyp = [] | |
# Split the ground truth and hypothesis into words | |
gt_words = ground_truth.split() | |
hyp_words = hypothesis.split() | |
gt_index = 0 | |
hyp_index = 0 | |
for op in measures['ops']: | |
for chunk in op: | |
if chunk.type == 'equal': | |
# Add equal words without highlighting | |
highlighted_hyp.extend(gt_words[gt_index:gt_index + (chunk.ref_end_idx - chunk.ref_start_idx)]) | |
gt_index += (chunk.ref_end_idx - chunk.ref_start_idx) | |
hyp_index += (chunk.hyp_end_idx - chunk.hyp_start_idx) | |
elif chunk.type == 'insert': | |
# Highlight inserted words in green | |
highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>') | |
hyp_index += 1 | |
elif chunk.type == 'sub': | |
# Highlight substituted words in purple | |
highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>') | |
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>') # Strikethrough for substitution | |
gt_index += 1 # Move in ground truth | |
hyp_index += 1 # Move in hypothesis | |
elif chunk.type == 'delete': | |
# Highlight deleted words in red with strikethrough | |
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>') | |
gt_index += 1 # Move in ground truth | |
# Handle any remaining words in hypothesis as insertions | |
while hyp_index < len(hyp_words): | |
highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>') | |
hyp_index += 1 | |
# Handle any remaining words in ground truth that were not matched | |
while gt_index < len(gt_words): | |
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>') | |
gt_index += 1 | |
highlighted_hyp_str = ' '.join(highlighted_hyp) | |
error_rate = wer(ground_truth, hypothesis) | |
# Color Legend HTML | |
legend_html = """ | |
<div style="margin-top: 10px;"> | |
<strong>Legend:</strong><br> | |
<span style="color:green;">Insertion</span>: Words in green<br> | |
<span style="color:purple;">Substitution</span>: Words in purple<br> | |
<span style="color:red; text-decoration:line-through;">Deletion</span>: Words in red with strikethrough<br> | |
</div> | |
""" | |
# Combine highlighted output and legend | |
combined_output = f"{highlighted_hyp_str}<br>{legend_html}" | |
return combined_output, error_rate, measures['substitutions'], measures['insertions'], measures['deletions'] | |
# Gradio Interface | |
interface = gr.Interface( | |
fn=highlight_errors, | |
inputs=["text", "text"], | |
outputs=[ | |
gr.Markdown(label="Highlighted Transcript with Legend"), | |
gr.Number(label="Word Error Rate"), | |
gr.Number(label="Substitutions"), | |
gr.Number(label="Insertions"), | |
gr.Number(label="Deletions") | |
], | |
title="WER Calculator with Error Highlighting and Legend" | |
) | |
interface.launch() | |