see-asr-outputs / app.py
Yehor's picture
A fix
1248b75
raw
history blame
3.9 kB
import sys
import re
from importlib.metadata import version
import polars as pl
import gradio as gr
# Config
concurrency_limit = 5
title = "See ASR Outputs"
# https://www.tablesgenerator.com/markdown_tables
authors_table = """
## Authors
Follow them on social networks and **contact** if you need any help or have any questions:
| <img src="https://avatars.githubusercontent.com/u/7875085?v=4" width="100"> **Yehor Smoliakov** |
|-------------------------------------------------------------------------------------------------|
| https://t.me/smlkw in Telegram |
| https://x.com/yehor_smoliakov at X |
| https://github.com/egorsmkv at GitHub |
| https://huggingface.co/Yehor at Hugging Face |
| or use [email protected] |
""".strip()
examples = [
["evaluation_results.jsonl", False],
["evaluation_results_batch.jsonl", True],
]
description_head = f"""
# {title}
## Overview
See generated JSONL files made by ASR models as a dataframe.
""".strip()
description_foot = f"""
{authors_table}
""".strip()
metrics_value = """
Metrics will appear here.
""".strip()
tech_env = f"""
#### Environment
- Python: {sys.version}
""".strip()
tech_libraries = f"""
#### Libraries
- gradio: {version("gradio")}
- polars: {version("polars")}
""".strip()
def inference(file_name, _batch_mode):
if not file_name:
raise gr.Error("Please paste your JSON file.")
df = pl.read_ndjson(file_name)
required_columns = [
"filename",
"inference_start",
"inference_end",
"inference_total",
"duration",
"reference",
"prediction",
]
required_columns_batch = [
"inference_start",
"inference_end",
"inference_total",
"filenames",
"durations",
"references",
"predictions",
]
if _batch_mode:
if not all(col in df.columns for col in required_columns_batch):
raise gr.Error(
f"Please provide a JSONL file with the following columns: {required_columns_batch}"
)
else:
if not all(col in df.columns for col in required_columns):
raise gr.Error(
f"Please provide a JSONL file with the following columns: {required_columns}"
)
# exclude inference_start, inference_end
if _batch_mode:
df = df.drop(["inference_start", "inference_end", "filenames"])
else:
df = df.drop(["inference_start", "inference_end", "filename"])
# round "inference_total" field to 2 decimal places
df = df.with_columns(pl.col("inference_total").round(2))
return df
demo = gr.Blocks(
title=title,
analytics_enabled=False,
theme=gr.themes.Base(),
)
with demo:
gr.Markdown(description_head)
gr.Markdown("## Usage")
with gr.Row():
df = gr.DataFrame(
label="Dataframe",
)
with gr.Row():
with gr.Column():
jsonl_file = gr.File(label="A JSONL file")
batch_mode = gr.Checkbox(
label="Use batch mode",
)
gr.Button("Show").click(
inference,
concurrency_limit=concurrency_limit,
inputs=[jsonl_file, batch_mode],
outputs=df,
)
with gr.Row():
gr.Examples(
label="Choose an example",
inputs=[jsonl_file, batch_mode],
examples=examples,
)
gr.Markdown(description_foot)
gr.Markdown("### Gradio app uses:")
gr.Markdown(tech_env)
gr.Markdown(tech_libraries)
if __name__ == "__main__":
demo.queue()
demo.launch()