Spaces:

Yehor
/

see-asr-outputs

Sleeping

App Files Files Community

see-asr-outputs / app.py

Yehor

A fix

1248b75 about 1 month ago

raw

history blame

3.9 kB

	import sys
	import re

	from importlib.metadata import version

	import polars as pl
	import gradio as gr

	# Config
	concurrency_limit = 5

	title = "See ASR Outputs"

	# https://www.tablesgenerator.com/markdown_tables
	authors_table = """
	## Authors

	Follow them on social networks and contact if you need any help or have any questions:

	\| <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> Yehor Smoliakov \|
	\|-------------------------------------------------------------------------------------------------\|
	\| https://t.me/smlkw in Telegram \|
	\| https://x.com/yehor_smoliakov at X \|
	\| https://github.com/egorsmkv at GitHub \|
	\| https://huggingface.co/Yehor at Hugging Face \|
	\| or use [email protected] \|
	""".strip()

	examples = [
	["evaluation_results.jsonl", False],
	["evaluation_results_batch.jsonl", True],
	]

	description_head = f"""
	# {title}

	## Overview

	See generated JSONL files made by ASR models as a dataframe.
	""".strip()

	description_foot = f"""
	{authors_table}
	""".strip()

	metrics_value = """
	Metrics will appear here.
	""".strip()

	tech_env = f"""
	#### Environment

	- Python: {sys.version}
	""".strip()

	tech_libraries = f"""
	#### Libraries

	- gradio: {version("gradio")}
	- polars: {version("polars")}
	""".strip()


	def inference(file_name, _batch_mode):
	if not file_name:
	raise gr.Error("Please paste your JSON file.")

	df = pl.read_ndjson(file_name)


	required_columns = [
	"filename",
	"inference_start",
	"inference_end",
	"inference_total",
	"duration",
	"reference",
	"prediction",
	]
	required_columns_batch = [
	"inference_start",
	"inference_end",
	"inference_total",
	"filenames",
	"durations",
	"references",
	"predictions",
	]

	if _batch_mode:
	if not all(col in df.columns for col in required_columns_batch):
	raise gr.Error(
	f"Please provide a JSONL file with the following columns: {required_columns_batch}"
	)
	else:
	if not all(col in df.columns for col in required_columns):
	raise gr.Error(
	f"Please provide a JSONL file with the following columns: {required_columns}"
	)

	# exclude inference_start, inference_end
	if _batch_mode:
	df = df.drop(["inference_start", "inference_end", "filenames"])
	else:
	df = df.drop(["inference_start", "inference_end", "filename"])

	# round "inference_total" field to 2 decimal places
	df = df.with_columns(pl.col("inference_total").round(2))

	return df


	demo = gr.Blocks(
	title=title,
	analytics_enabled=False,
	theme=gr.themes.Base(),
	)

	with demo:
	gr.Markdown(description_head)

	gr.Markdown("## Usage")

	with gr.Row():
	df = gr.DataFrame(
	label="Dataframe",
	)

	with gr.Row():
	with gr.Column():
	jsonl_file = gr.File(label="A JSONL file")

	batch_mode = gr.Checkbox(
	label="Use batch mode",
	)


	gr.Button("Show").click(
	inference,
	concurrency_limit=concurrency_limit,
	inputs=[jsonl_file, batch_mode],
	outputs=df,
	)

	with gr.Row():
	gr.Examples(
	label="Choose an example",
	inputs=[jsonl_file, batch_mode],
	examples=examples,
	)

	gr.Markdown(description_foot)

	gr.Markdown("### Gradio app uses:")
	gr.Markdown(tech_env)
	gr.Markdown(tech_libraries)

	if __name__ == "__main__":
	demo.queue()
	demo.launch()