Spaces:

adalat-ai
/

wer-analysis

Sleeping

App Files Files Community

wer-analysis / app.py

janaab

Working prototype

d1aae39 verified 9 months ago

raw

history blame

3.38 kB

	import gradio as gr
	from jiwer import wer, compute_measures

	# Function to highlight errors
	def highlight_errors(ground_truth, hypothesis):
	measures = compute_measures(ground_truth, hypothesis)

	highlighted_hyp = []

	# Split the ground truth and hypothesis into words
	gt_words = ground_truth.split()
	hyp_words = hypothesis.split()

	gt_index = 0
	hyp_index = 0

	# Process each alignment operation in measures
	for alignment in measures['ops']:
	for chunk in alignment:
	print(chunk)
	if chunk.type == 'equal':
	# Add equal words without highlighting
	highlighted_hyp.extend(gt_words[chunk.ref_start_idx:chunk.ref_end_idx])
	gt_index = chunk.ref_end_idx
	hyp_index = chunk.hyp_end_idx
	elif chunk.type == 'insert':
	# Highlight inserted words in green
	highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
	hyp_index += 1
	elif chunk.type == 'substitute':
	# Highlight substitutions: hypothesis in purple, ground truth in red
	highlighted_hyp.append(f'<span style="color:purple;">{hyp_words[hyp_index]}</span>') # Hypothesis word
	highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>') # Ground truth word
	gt_index += 1
	hyp_index += 1
	elif chunk.type == 'delete':
	# Highlight deleted words in red with strikethrough
	highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
	gt_index += 1

	# Handle any remaining words in hypothesis as insertions
	while hyp_index < len(hyp_words):
	highlighted_hyp.append(f'<span style="color:green;">{hyp_words[hyp_index]}</span>')
	hyp_index += 1

	# Handle any remaining words in ground truth that were not matched
	while gt_index < len(gt_words):
	highlighted_hyp.append(f'<span style="color:red; text-decoration:line-through;">{gt_words[gt_index]}</span>')
	gt_index += 1

	highlighted_hyp_str = ' '.join(highlighted_hyp)

	error_rate = wer(ground_truth, hypothesis)

	# Color Legend HTML
	legend_html = """
	<div style="margin-top: 10px;">
	<strong>Legend:</strong><br>
	<span style="color:green;">Insertion</span>: Words in green<br>
	<span style="color:purple;">Substitution</span>: Words in purple<br>
	<span style="color:red; text-decoration:line-through;">Deletion</span>: Words in red with strikethrough<br>
	</div>
	"""

	# Combine highlighted output and legend
	combined_output = f"{highlighted_hyp_str}<br>{legend_html}"

	return combined_output, error_rate, measures['substitutions'], measures['insertions'], measures['deletions']

	# Gradio Interface
	interface = gr.Interface(
	fn=highlight_errors,
	inputs=["text", "text"],
	outputs=[
	gr.HTML(label="Highlighted Transcript with Legend"),
	gr.Number(label="Word Error Rate"),
	gr.Number(label="Substitutions"),
	gr.Number(label="Insertions"),
	gr.Number(label="Deletions")
	],
	title="WER Calculator with Error Highlighting and Legend"
	)

	interface.launch()