Spaces:
Sleeping
Sleeping
File size: 3,529 Bytes
bdee176 486a085 fd604c4 486a085 1c61313 486a085 bdee176 1c61313 811f643 1c61313 7d3a98a 811f643 1c61313 6e75d7c 05121a3 94661bc 6e75d7c 05121a3 94661bc 6e75d7c bdee176 1c61313 c6b858f 6f014a9 b28fe12 6f014a9 1c61313 bd17ee0 1c61313 6e75d7c b4b3e6a bd17ee0 bdee176 bd17ee0 811f643 486a085 1c61313 bdee176 486a085 bd17ee0 06d3610 486a085 bdee176 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import pandas as pd
import gradio as gr
def compare_csv_files(selected_languages, model_size):
max_num = 10
# Construct file names dynamically based on model size
file_1_5 = f"result_1.5_{model_size}.csv"
file_1_4 = f"result_1.4_{model_size}.csv"
# Load data
df1 = pd.read_csv(file_1_5)
df2 = pd.read_csv(file_1_4)
# Merge with Language column
merged_df = pd.merge(df1, df2, on=["SourceText", "Language"], suffixes=("_1.5", "_1.4"))
# Filter by selected languages
if selected_languages:
merged_df = merged_df[merged_df["Language"].isin(selected_languages)]
# Calculate differences
merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1.5"] - merged_df["WordErrorRate_1.4"]
merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1.5"] - merged_df["CharacterErrorRate_1.4"]
# Add comparison columns
merged_df["WordErrorRate_Comparison"] = merged_df["WordErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
merged_df["CharacterErrorRate_Comparison"] = merged_df["CharacterErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
# Overall averages
avg_word_diff = merged_df["WordErrorRate_Diff"].loc[merged_df["WordErrorRate_Diff"].abs() <= max_num].mean()
avg_char_diff = merged_df["CharacterErrorRate_Diff"].loc[merged_df["CharacterErrorRate_Diff"].abs() <= 1].mean()
overall_summary = f"""
<h3>Overall Comparison:</h3>
<p>Average WordErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_word_diff:.8f})' if avg_word_diff < 0 else f'1.4 is stronger ({0 - avg_word_diff:.8f})' if avg_word_diff > 0 else "1.4 is the same as 1.5 (0)"}</p>
<p>Average CharacterErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_char_diff:.8f})' if avg_char_diff < 0 else f'1.4 is stronger ({0 - avg_char_diff:.8f})' if avg_word_diff > 0 else "1.4 is the same as 1.5 (0)"}</p>
"""
# Generate result HTML
result_html = overall_summary + merged_df[[
"Language",
"SourceText",
"WordErrorRate_1.5", "WordErrorRate_1.4", "WordErrorRate_Comparison",
"CharacterErrorRate_1.5", "CharacterErrorRate_1.4", "CharacterErrorRate_Comparison",
]].to_html(escape=False, index=False)
return result_html
# Load unique languages from the data (defaulting to Base files for initialization)
df1 = pd.read_csv("result_1.5_Base.csv")
df2 = pd.read_csv("result_1.4_Base.csv")
languages = sorted(set(df1["Language"]).union(set(df2["Language"])))
gr.Interface(
fn=compare_csv_files,
inputs=[
gr.CheckboxGroup(choices=languages, label="Select Languages to Compare"),
gr.Dropdown(choices=["Base", "Medium"], label="Select Whisper Model Size", value="Base")
],
outputs="html",
title="Fish Speech Benchmark",
description="Select specific languages and model sizes (Base or Medium) to compare the results of WordErrorRate and CharacterErrorRate."
).launch()
|