None1145's picture
Update app.py
a8dfc6b verified
raw
history blame
3.25 kB
import pandas as pd
import gradio as gr
import os
def compare_csv_files(max_num):
df1 = pd.read_csv("fish-speech-1.5.csv")
df2 = pd.read_csv("fish-speech-1.4.csv")
merged_df = pd.merge(df1, df2, on="SourceText", suffixes=("_1.5", "_1.4"))
merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1.5"] - merged_df["WordErrorRate_1.4"]
merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1.5"] - merged_df["CharacterErrorRate_1.4"]
merged_df["WordErrorRate_Comparison"] = merged_df["WordErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
merged_df["CharacterErrorRate_Comparison"] = merged_df["CharacterErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
avg_word_diff = merged_df["WordErrorRate_Diff"].loc[merged_df["WordErrorRate_Diff"].abs() <= max_num].mean()
avg_char_diff = merged_df["CharacterErrorRate_Diff"].loc[merged_df["CharacterErrorRate_Diff"].abs() <= 1].mean()
overall_summary = f"""
<h3>Overall Comparison:</h3>
<p>Average WordErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_word_diff:.8f})' if avg_word_diff < 0 else f'1.4 is stronger ({0 - avg_word_diff:.8f})'}</p>
<p>Average CharacterErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_char_diff:.8f})' if avg_char_diff < 0 else f'1.4 is stronger ({0 - avg_char_diff:.8f})'}</p>
"""
def get_audio_files(uuid):
file_1_5 = os.path.join("fish-speech-1.5", f"{uuid}.wav")
file_1_4 = os.path.join("fish-speech-1.4", f"{uuid}.wav")
return file_1_5, file_1_4
audio_files = []
for uuid in merged_df["SourceText"]:
file_1_5, file_1_4 = get_audio_files(uuid)
audio_files.append((file_1_5, file_1_4))
result = merged_df[[
"SourceText",
"WordErrorRate_1.5", "WordErrorRate_1.4", "WordErrorRate_Comparison",
"CharacterErrorRate_1.5", "CharacterErrorRate_1.4", "CharacterErrorRate_Comparison",
"WhisperText_1.5", "WhisperText_1.4"
]]
# Add audio columns to the result for Gradio interface
audio_columns = [
gr.Audio(value=file_1_5) for file_1_5, _ in audio_files
] + [
gr.Audio(value=file_1_4) for _, file_1_4 in audio_files
]
return overall_summary + result.to_html(index=False), *audio_columns
max_num = gr.Number(value=10)
gr.Interface(
fn=compare_csv_files,
inputs=[max_num],
outputs=["html"] + [gr.Audio() for _ in range(len(df1))], # Dynamically add audio outputs
title="Fish Speech Benchmark",
description="This is a non-official model performance test from Fish Speech / Whisper Base / More data will be added later (not too much)"
).launch()