File size: 3,380 Bytes
d82751e
 
 
 
 
 
20116ed
 
 
 
d82751e
 
 
20116ed
 
 
d1aae39
 
 
 
 
20116ed
d1aae39
 
 
 
20116ed
d1aae39
 
 
 
 
 
0b203ab
 
d1aae39
20116ed
 
0b203ab
20116ed
 
 
 
 
 
 
 
 
 
 
 
d82751e
 
20116ed
d82751e
 
 
 
20116ed
 
 
d82751e
 
 
20116ed
 
 
 
d82751e
 
 
 
 
 
d1aae39
d82751e
 
 
 
 
d1aae39
d82751e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from jiwer import wer, compute_measures

# Function to highlight errors
def highlight_errors(ground_truth, hypothesis):
    measures = compute_measures(ground_truth, hypothesis)

    highlighted_hyp = []
    
    # Split the ground truth and hypothesis into words
    gt_words = ground_truth.split()
    hyp_words = hypothesis.split()

    gt_index = 0
    hyp_index = 0

    # Process each alignment operation in measures
    for alignment in measures['ops']:
        for chunk in alignment:
            print(chunk)
            if chunk.type == 'equal':
                # Add equal words without highlighting
                highlighted_hyp.extend(gt_words[chunk.ref_start_idx:chunk.ref_end_idx])
                gt_index = chunk.ref_end_idx
                hyp_index = chunk.hyp_end_idx
            elif chunk.type == 'insert':
                # Highlight inserted words in green
                highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
                hyp_index += 1
            elif chunk.type == 'substitute':
                # Highlight substitutions: hypothesis in purple, ground truth in red
                highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>')  # Hypothesis word
                highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')  # Ground truth word
                gt_index += 1
                hyp_index += 1
            elif chunk.type == 'delete':
                # Highlight deleted words in red with strikethrough
                highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
                gt_index += 1

    # Handle any remaining words in hypothesis as insertions
    while hyp_index < len(hyp_words):
        highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
        hyp_index += 1

    # Handle any remaining words in ground truth that were not matched
    while gt_index < len(gt_words):
        highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
        gt_index += 1

    highlighted_hyp_str = ' '.join(highlighted_hyp)
    
    error_rate = wer(ground_truth, hypothesis)

    # Color Legend HTML
    legend_html = """
    <div style="margin-top: 10px;">
        <strong>Legend:</strong><br>
        <span style="color:green;">Insertion</span>: Words in green<br>
        <span style="color:purple;">Substitution</span>: Words in purple<br>
        <span style="color:red; text-decoration:line-through;">Deletion</span>: Words in red with strikethrough<br>
    </div>
    """

    # Combine highlighted output and legend
    combined_output = f"{highlighted_hyp_str}<br>{legend_html}"

    return combined_output, error_rate, measures['substitutions'], measures['insertions'], measures['deletions']

# Gradio Interface
interface = gr.Interface(
    fn=highlight_errors,
    inputs=["text", "text"],
    outputs=[
        gr.HTML(label="Highlighted Transcript with Legend"),
        gr.Number(label="Word Error Rate"),
        gr.Number(label="Substitutions"),
        gr.Number(label="Insertions"),
        gr.Number(label="Deletions")
    ],
    title="WER Calculator with Error Highlighting and Legend"
)

interface.launch()