Spaces:
Sleeping
Sleeping
File size: 3,380 Bytes
d82751e 20116ed d82751e 20116ed d1aae39 20116ed d1aae39 20116ed d1aae39 0b203ab d1aae39 20116ed 0b203ab 20116ed d82751e 20116ed d82751e 20116ed d82751e 20116ed d82751e d1aae39 d82751e d1aae39 d82751e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
from jiwer import wer, compute_measures
# Function to highlight errors
def highlight_errors(ground_truth, hypothesis):
measures = compute_measures(ground_truth, hypothesis)
highlighted_hyp = []
# Split the ground truth and hypothesis into words
gt_words = ground_truth.split()
hyp_words = hypothesis.split()
gt_index = 0
hyp_index = 0
# Process each alignment operation in measures
for alignment in measures['ops']:
for chunk in alignment:
print(chunk)
if chunk.type == 'equal':
# Add equal words without highlighting
highlighted_hyp.extend(gt_words[chunk.ref_start_idx:chunk.ref_end_idx])
gt_index = chunk.ref_end_idx
hyp_index = chunk.hyp_end_idx
elif chunk.type == 'insert':
# Highlight inserted words in green
highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
hyp_index += 1
elif chunk.type == 'substitute':
# Highlight substitutions: hypothesis in purple, ground truth in red
highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>') # Hypothesis word
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>') # Ground truth word
gt_index += 1
hyp_index += 1
elif chunk.type == 'delete':
# Highlight deleted words in red with strikethrough
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
gt_index += 1
# Handle any remaining words in hypothesis as insertions
while hyp_index < len(hyp_words):
highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
hyp_index += 1
# Handle any remaining words in ground truth that were not matched
while gt_index < len(gt_words):
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
gt_index += 1
highlighted_hyp_str = ' '.join(highlighted_hyp)
error_rate = wer(ground_truth, hypothesis)
# Color Legend HTML
legend_html = """
<div style="margin-top: 10px;">
<strong>Legend:</strong><br>
<span style="color:green;">Insertion</span>: Words in green<br>
<span style="color:purple;">Substitution</span>: Words in purple<br>
<span style="color:red; text-decoration:line-through;">Deletion</span>: Words in red with strikethrough<br>
</div>
"""
# Combine highlighted output and legend
combined_output = f"{highlighted_hyp_str}<br>{legend_html}"
return combined_output, error_rate, measures['substitutions'], measures['insertions'], measures['deletions']
# Gradio Interface
interface = gr.Interface(
fn=highlight_errors,
inputs=["text", "text"],
outputs=[
gr.HTML(label="Highlighted Transcript with Legend"),
gr.Number(label="Word Error Rate"),
gr.Number(label="Substitutions"),
gr.Number(label="Insertions"),
gr.Number(label="Deletions")
],
title="WER Calculator with Error Highlighting and Legend"
)
interface.launch()
|