Spaces:
Running
Running
import pandas as pd | |
import gradio as gr | |
import os | |
def compare_csv_files(max_num): | |
df1 = pd.read_csv("fish-speech-1.5.csv") | |
df2 = pd.read_csv("fish-speech-1.4.csv") | |
merged_df = pd.merge(df1, df2, on="SourceText", suffixes=("_1.5", "_1.4")) | |
merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1.5"] - merged_df["WordErrorRate_1.4"] | |
merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1.5"] - merged_df["CharacterErrorRate_1.4"] | |
merged_df["WordErrorRate_Comparison"] = merged_df["WordErrorRate_Diff"].apply( | |
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else ( | |
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else ( | |
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)" | |
) | |
) | |
) | |
merged_df["CharacterErrorRate_Comparison"] = merged_df["CharacterErrorRate_Diff"].apply( | |
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else ( | |
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else ( | |
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)" | |
) | |
) | |
) | |
avg_word_diff = merged_df["WordErrorRate_Diff"].loc[merged_df["WordErrorRate_Diff"].abs() <= max_num].mean() | |
avg_char_diff = merged_df["CharacterErrorRate_Diff"].loc[merged_df["CharacterErrorRate_Diff"].abs() <= 1].mean() | |
overall_summary = f""" | |
<h3>Overall Comparison:</h3> | |
<p>Average WordErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_word_diff:.8f})' if avg_word_diff < 0 else f'1.4 is stronger ({0 - avg_word_diff:.8f})'}</p> | |
<p>Average CharacterErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_char_diff:.8f})' if avg_char_diff < 0 else f'1.4 is stronger ({0 - avg_char_diff:.8f})'}</p> | |
""" | |
def get_audio_html(uuid): | |
file_1_5 = os.path.join("fish-speech-1.5", f"{uuid}.wav") | |
file_1_4 = os.path.join("fish-speech-1.4", f"{uuid}.wav") | |
audio_1_5 = f'<audio controls src="{file_1_5}"></audio>' if os.path.exists(file_1_5) else "Missing Audio" | |
audio_1_4 = f'<audio controls src="{file_1_4}"></audio>' if os.path.exists(file_1_4) else "Missing Audio" | |
return audio_1_5, audio_1_4 | |
merged_df["Audio_1.5"], merged_df["Audio_1.4"] = zip(*merged_df["SourceText"].apply(get_audio_html)) | |
result_html = overall_summary + merged_df[[ | |
"SourceText", | |
"WordErrorRate_1.5", "WordErrorRate_1.4", "WordErrorRate_Comparison", | |
"CharacterErrorRate_1.5", "CharacterErrorRate_1.4", "CharacterErrorRate_Comparison", | |
"WhisperText_1.5", "WhisperText_1.4", | |
"Audio_1.5", "Audio_1.4" | |
]].to_html(escape=False, index=False) | |
return result_html | |
max_num = gr.Number(value=10) | |
gr.Interface( | |
fn=compare_csv_files, | |
inputs=[max_num], | |
outputs="html", | |
title="Fish Speech Benchmark", | |
description="This is a non-official model performance test from Fish Speech / Whisper Base / More data will be added later (not too much)" | |
).launch() | |