Spaces:
Sleeping
Sleeping
File size: 3,064 Bytes
bdee176 1c61313 fd604c4 1c61313 0ded70e bdee176 1c61313 811f643 1c61313 7d3a98a 811f643 1c61313 6e75d7c 05121a3 94661bc 6e75d7c 05121a3 94661bc 6e75d7c bdee176 1c61313 c6b858f 6f014a9 9c11640 6f014a9 1c61313 bd17ee0 1c61313 6e75d7c b4b3e6a bd17ee0 bdee176 bd17ee0 811f643 1c61313 bdee176 1c61313 bd17ee0 06d3610 1c61313 bdee176 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import pandas as pd
import gradio as gr
def compare_csv_files(selected_languages):
max_num = 10
# Load data
df1 = pd.read_csv("result_1.5.csv")
df2 = pd.read_csv("result_1.4.csv")
# Merge with Language column
merged_df = pd.merge(df1, df2, on=["SourceText", "Language"], suffixes=("_1.5", "_1.4"))
# Filter by selected languages
if selected_languages:
merged_df = merged_df[merged_df["Language"].isin(selected_languages)]
# Calculate differences
merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1.5"] - merged_df["WordErrorRate_1.4"]
merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1.5"] - merged_df["CharacterErrorRate_1.4"]
# Add comparison columns
merged_df["WordErrorRate_Comparison"] = merged_df["WordErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
merged_df["CharacterErrorRate_Comparison"] = merged_df["CharacterErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
# Overall averages
avg_word_diff = merged_df["WordErrorRate_Diff"].loc[merged_df["WordErrorRate_Diff"].abs() <= max_num].mean()
avg_char_diff = merged_df["CharacterErrorRate_Diff"].loc[merged_df["CharacterErrorRate_Diff"].abs() <= 1].mean()
overall_summary = f"""
<h3>Overall Comparison:</h3>
<p>Average WordErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_word_diff:.8f})' if avg_word_diff < 0 else f'1.4 is stronger ({0 - avg_word_diff:.8f})'}</p>
<p>Average CharacterErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_char_diff:.8f})' if avg_char_diff < 0 else f'1.4 is stronger ({0 - avg_char_diff:.8f})'}</p>
"""
# Generate result HTML
result_html = overall_summary + merged_df[[
"Language",
"SourceText",
"WordErrorRate_1.5", "WordErrorRate_1.4", "WordErrorRate_Comparison",
"CharacterErrorRate_1.5", "CharacterErrorRate_1.4", "CharacterErrorRate_Comparison",
]].to_html(escape=False, index=False)
return result_html
# Load unique languages from the data
df1 = pd.read_csv("result_1.5.csv")
df2 = pd.read_csv("result_1.4.csv")
languages = sorted(set(df1["Language"]).union(set(df2["Language"])))
gr.Interface(
fn=compare_csv_files,
inputs=gr.CheckboxGroup(choices=languages, label="Select Languages to Compare"),
outputs="html",
title="Fish Speech Benchmark",
description="Select specific languages to compare the results of WordErrorRate and CharacterErrorRate."
).launch()
|