janaab commited on
Commit
20116ed
·
verified ·
1 Parent(s): a69fdf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -17
app.py CHANGED
@@ -4,47 +4,79 @@ from jiwer import wer, compute_measures
4
  # Function to highlight errors
5
  def highlight_errors(ground_truth, hypothesis):
6
  measures = compute_measures(ground_truth, hypothesis)
 
 
 
 
7
  gt_words = ground_truth.split()
8
  hyp_words = hypothesis.split()
9
 
10
- highlighted_hyp = []
 
 
11
  for op in measures['ops']:
12
- if op[0] == 'hit':
13
- highlighted_hyp.append(f'<span>{op[1]}</span>')
14
- elif op[0] == 'sub':
15
- highlighted_hyp.append(f'<span style="color:orange;">{op[2]}</span>') # Substitution
16
- elif op[0] == 'ins':
17
- highlighted_hyp.append(f'<span style="color:red;">{op[2]}</span>') # Insertion
18
- elif op[0] == 'del':
19
- highlighted_hyp.append(f'<span style="color:blue; text-decoration:line-through;">{op[1]}</span>') # Deletion
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- highlighted_hyp = ' '.join(highlighted_hyp)
22
  error_rate = wer(ground_truth, hypothesis)
23
-
24
  # Color Legend HTML
25
  legend_html = """
26
  <div style="margin-top: 10px;">
27
  <strong>Legend:</strong><br>
28
- <span style="color:orange;">Substitution</span>: Words in orange<br>
29
- <span style="color:red;">Insertion</span>: Words in red<br>
30
- <span style="color:blue; text-decoration:line-through;">Deletion</span>: Words in blue with strikethrough<br>
31
  </div>
32
  """
33
 
34
- return highlighted_hyp + legend_html, error_rate, measures['substitutions'], measures['insertions'], measures['deletions']
 
 
 
35
 
36
  # Gradio Interface
37
  interface = gr.Interface(
38
  fn=highlight_errors,
39
  inputs=["text", "text"],
40
  outputs=[
41
- gr.HTML(label="Highlighted Transcript"),
42
  gr.Number(label="Word Error Rate"),
43
  gr.Number(label="Substitutions"),
44
  gr.Number(label="Insertions"),
45
  gr.Number(label="Deletions")
46
  ],
47
- title="WER Analysis"
48
  )
49
 
50
  interface.launch()
 
4
  # Function to highlight errors
5
  def highlight_errors(ground_truth, hypothesis):
6
  measures = compute_measures(ground_truth, hypothesis)
7
+
8
+ highlighted_hyp = []
9
+
10
+ # Split the ground truth and hypothesis into words
11
  gt_words = ground_truth.split()
12
  hyp_words = hypothesis.split()
13
 
14
+ gt_index = 0
15
+ hyp_index = 0
16
+
17
  for op in measures['ops']:
18
+ for chunk in op:
19
+ if chunk.type == 'equal':
20
+ # Add equal words without highlighting
21
+ highlighted_hyp.extend(gt_words[gt_index:gt_index + (chunk.ref_end_idx - chunk.ref_start_idx)])
22
+ gt_index += (chunk.ref_end_idx - chunk.ref_start_idx)
23
+ hyp_index += (chunk.hyp_end_idx - chunk.hyp_start_idx)
24
+ elif chunk.type == 'insert':
25
+ # Highlight inserted words in green
26
+ highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
27
+ hyp_index += 1
28
+ elif chunk.type == 'sub':
29
+ # Highlight substituted words in purple
30
+ highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>')
31
+ highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>') # Strikethrough for substitution
32
+ gt_index += 1 # Move in ground truth
33
+ hyp_index += 1 # Move in hypothesis
34
+ elif chunk.type == 'delete':
35
+ # Highlight deleted words in red with strikethrough
36
+ highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
37
+ gt_index += 1 # Move in ground truth
38
+
39
+ # Handle any remaining words in hypothesis as insertions
40
+ while hyp_index < len(hyp_words):
41
+ highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
42
+ hyp_index += 1
43
+
44
+ # Handle any remaining words in ground truth that were not matched
45
+ while gt_index < len(gt_words):
46
+ highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
47
+ gt_index += 1
48
+
49
+ highlighted_hyp_str = ' '.join(highlighted_hyp)
50
 
 
51
  error_rate = wer(ground_truth, hypothesis)
52
+
53
  # Color Legend HTML
54
  legend_html = """
55
  <div style="margin-top: 10px;">
56
  <strong>Legend:</strong><br>
57
+ <span style="color:green;">Insertion</span>: Words in green<br>
58
+ <span style="color:purple;">Substitution</span>: Words in purple<br>
59
+ <span style="color:red; text-decoration:line-through;">Deletion</span>: Words in red with strikethrough<br>
60
  </div>
61
  """
62
 
63
+ # Combine highlighted output and legend
64
+ combined_output = f"{highlighted_hyp_str}<br>{legend_html}"
65
+
66
+ return combined_output, error_rate, measures['substitutions'], measures['insertions'], measures['deletions']
67
 
68
  # Gradio Interface
69
  interface = gr.Interface(
70
  fn=highlight_errors,
71
  inputs=["text", "text"],
72
  outputs=[
73
+ gr.Markdown(label="Highlighted Transcript with Legend"),
74
  gr.Number(label="Word Error Rate"),
75
  gr.Number(label="Substitutions"),
76
  gr.Number(label="Insertions"),
77
  gr.Number(label="Deletions")
78
  ],
79
+ title="WER Calculator with Error Highlighting and Legend"
80
  )
81
 
82
  interface.launch()