Spaces:
Sleeping
Sleeping
File size: 3,245 Bytes
bdee176 a8dfc6b bdee176 05121a3 811f643 bdee176 7d3a98a 811f643 7d3a98a 811f643 6e75d7c 05121a3 94661bc 6e75d7c 05121a3 94661bc 6e75d7c bdee176 05121a3 94661bc 6f014a9 9c11640 6f014a9 a8dfc6b 6e75d7c b4b3e6a bdee176 a8dfc6b bdee176 a8dfc6b 811f643 71ff049 bdee176 05121a3 a8dfc6b 06d3610 a8dfc6b bdee176 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import pandas as pd
import gradio as gr
import os
def compare_csv_files(max_num):
df1 = pd.read_csv("fish-speech-1.5.csv")
df2 = pd.read_csv("fish-speech-1.4.csv")
merged_df = pd.merge(df1, df2, on="SourceText", suffixes=("_1.5", "_1.4"))
merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1.5"] - merged_df["WordErrorRate_1.4"]
merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1.5"] - merged_df["CharacterErrorRate_1.4"]
merged_df["WordErrorRate_Comparison"] = merged_df["WordErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
merged_df["CharacterErrorRate_Comparison"] = merged_df["CharacterErrorRate_Diff"].apply(
lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
)
)
)
avg_word_diff = merged_df["WordErrorRate_Diff"].loc[merged_df["WordErrorRate_Diff"].abs() <= max_num].mean()
avg_char_diff = merged_df["CharacterErrorRate_Diff"].loc[merged_df["CharacterErrorRate_Diff"].abs() <= 1].mean()
overall_summary = f"""
<h3>Overall Comparison:</h3>
<p>Average WordErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_word_diff:.8f})' if avg_word_diff < 0 else f'1.4 is stronger ({0 - avg_word_diff:.8f})'}</p>
<p>Average CharacterErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_char_diff:.8f})' if avg_char_diff < 0 else f'1.4 is stronger ({0 - avg_char_diff:.8f})'}</p>
"""
def get_audio_files(uuid):
file_1_5 = os.path.join("fish-speech-1.5", f"{uuid}.wav")
file_1_4 = os.path.join("fish-speech-1.4", f"{uuid}.wav")
return file_1_5, file_1_4
audio_files = []
for uuid in merged_df["SourceText"]:
file_1_5, file_1_4 = get_audio_files(uuid)
audio_files.append((file_1_5, file_1_4))
result = merged_df[[
"SourceText",
"WordErrorRate_1.5", "WordErrorRate_1.4", "WordErrorRate_Comparison",
"CharacterErrorRate_1.5", "CharacterErrorRate_1.4", "CharacterErrorRate_Comparison",
"WhisperText_1.5", "WhisperText_1.4"
]]
# Add audio columns to the result for Gradio interface
audio_columns = [
gr.Audio(value=file_1_5) for file_1_5, _ in audio_files
] + [
gr.Audio(value=file_1_4) for _, file_1_4 in audio_files
]
return overall_summary + result.to_html(index=False), *audio_columns
max_num = gr.Number(value=10)
gr.Interface(
fn=compare_csv_files,
inputs=[max_num],
outputs=["html"] + [gr.Audio() for _ in range(len(df1))], # Dynamically add audio outputs
title="Fish Speech Benchmark",
description="This is a non-official model performance test from Fish Speech / Whisper Base / More data will be added later (not too much)"
).launch()
|