Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,47 +4,79 @@ from jiwer import wer, compute_measures
|
|
4 |
# Function to highlight errors
|
5 |
def highlight_errors(ground_truth, hypothesis):
|
6 |
measures = compute_measures(ground_truth, hypothesis)
|
|
|
|
|
|
|
|
|
7 |
gt_words = ground_truth.split()
|
8 |
hyp_words = hypothesis.split()
|
9 |
|
10 |
-
|
|
|
|
|
11 |
for op in measures['ops']:
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
-
highlighted_hyp = ' '.join(highlighted_hyp)
|
22 |
error_rate = wer(ground_truth, hypothesis)
|
23 |
-
|
24 |
# Color Legend HTML
|
25 |
legend_html = """
|
26 |
<div style="margin-top: 10px;">
|
27 |
<strong>Legend:</strong><br>
|
28 |
-
<span style="color:
|
29 |
-
<span style="color:
|
30 |
-
<span style="color:
|
31 |
</div>
|
32 |
"""
|
33 |
|
34 |
-
|
|
|
|
|
|
|
35 |
|
36 |
# Gradio Interface
|
37 |
interface = gr.Interface(
|
38 |
fn=highlight_errors,
|
39 |
inputs=["text", "text"],
|
40 |
outputs=[
|
41 |
-
gr.
|
42 |
gr.Number(label="Word Error Rate"),
|
43 |
gr.Number(label="Substitutions"),
|
44 |
gr.Number(label="Insertions"),
|
45 |
gr.Number(label="Deletions")
|
46 |
],
|
47 |
-
title="WER
|
48 |
)
|
49 |
|
50 |
interface.launch()
|
|
|
4 |
# Function to highlight errors
|
5 |
def highlight_errors(ground_truth, hypothesis):
|
6 |
measures = compute_measures(ground_truth, hypothesis)
|
7 |
+
|
8 |
+
highlighted_hyp = []
|
9 |
+
|
10 |
+
# Split the ground truth and hypothesis into words
|
11 |
gt_words = ground_truth.split()
|
12 |
hyp_words = hypothesis.split()
|
13 |
|
14 |
+
gt_index = 0
|
15 |
+
hyp_index = 0
|
16 |
+
|
17 |
for op in measures['ops']:
|
18 |
+
for chunk in op:
|
19 |
+
if chunk.type == 'equal':
|
20 |
+
# Add equal words without highlighting
|
21 |
+
highlighted_hyp.extend(gt_words[gt_index:gt_index + (chunk.ref_end_idx - chunk.ref_start_idx)])
|
22 |
+
gt_index += (chunk.ref_end_idx - chunk.ref_start_idx)
|
23 |
+
hyp_index += (chunk.hyp_end_idx - chunk.hyp_start_idx)
|
24 |
+
elif chunk.type == 'insert':
|
25 |
+
# Highlight inserted words in green
|
26 |
+
highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
|
27 |
+
hyp_index += 1
|
28 |
+
elif chunk.type == 'sub':
|
29 |
+
# Highlight substituted words in purple
|
30 |
+
highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>')
|
31 |
+
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>') # Strikethrough for substitution
|
32 |
+
gt_index += 1 # Move in ground truth
|
33 |
+
hyp_index += 1 # Move in hypothesis
|
34 |
+
elif chunk.type == 'delete':
|
35 |
+
# Highlight deleted words in red with strikethrough
|
36 |
+
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
|
37 |
+
gt_index += 1 # Move in ground truth
|
38 |
+
|
39 |
+
# Handle any remaining words in hypothesis as insertions
|
40 |
+
while hyp_index < len(hyp_words):
|
41 |
+
highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
|
42 |
+
hyp_index += 1
|
43 |
+
|
44 |
+
# Handle any remaining words in ground truth that were not matched
|
45 |
+
while gt_index < len(gt_words):
|
46 |
+
highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
|
47 |
+
gt_index += 1
|
48 |
+
|
49 |
+
highlighted_hyp_str = ' '.join(highlighted_hyp)
|
50 |
|
|
|
51 |
error_rate = wer(ground_truth, hypothesis)
|
52 |
+
|
53 |
# Color Legend HTML
|
54 |
legend_html = """
|
55 |
<div style="margin-top: 10px;">
|
56 |
<strong>Legend:</strong><br>
|
57 |
+
<span style="color:green;">Insertion</span>: Words in green<br>
|
58 |
+
<span style="color:purple;">Substitution</span>: Words in purple<br>
|
59 |
+
<span style="color:red; text-decoration:line-through;">Deletion</span>: Words in red with strikethrough<br>
|
60 |
</div>
|
61 |
"""
|
62 |
|
63 |
+
# Combine highlighted output and legend
|
64 |
+
combined_output = f"{highlighted_hyp_str}<br>{legend_html}"
|
65 |
+
|
66 |
+
return combined_output, error_rate, measures['substitutions'], measures['insertions'], measures['deletions']
|
67 |
|
68 |
# Gradio Interface
|
69 |
interface = gr.Interface(
|
70 |
fn=highlight_errors,
|
71 |
inputs=["text", "text"],
|
72 |
outputs=[
|
73 |
+
gr.Markdown(label="Highlighted Transcript with Legend"),
|
74 |
gr.Number(label="Word Error Rate"),
|
75 |
gr.Number(label="Substitutions"),
|
76 |
gr.Number(label="Insertions"),
|
77 |
gr.Number(label="Deletions")
|
78 |
],
|
79 |
+
title="WER Calculator with Error Highlighting and Legend"
|
80 |
)
|
81 |
|
82 |
interface.launch()
|