File size: 3,245 Bytes
bdee176
 
a8dfc6b
bdee176
05121a3
811f643
 
bdee176
7d3a98a
811f643
7d3a98a
811f643
 
6e75d7c
05121a3
94661bc
 
 
6e75d7c
 
 
05121a3
94661bc
 
 
6e75d7c
 
bdee176
05121a3
94661bc
6f014a9
 
9c11640
 
6f014a9
 
a8dfc6b
 
 
 
 
 
 
 
 
 
6e75d7c
 
 
b4b3e6a
 
bdee176
a8dfc6b
 
 
 
 
 
 
bdee176
a8dfc6b
811f643
71ff049
bdee176
 
05121a3
a8dfc6b
06d3610
a8dfc6b
bdee176
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pandas as pd
import gradio as gr
import os

def compare_csv_files(max_num):
    df1 = pd.read_csv("fish-speech-1.5.csv")
    df2 = pd.read_csv("fish-speech-1.4.csv")
    
    merged_df = pd.merge(df1, df2, on="SourceText", suffixes=("_1.5", "_1.4"))
    
    merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1.5"] - merged_df["WordErrorRate_1.4"]
    merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1.5"] - merged_df["CharacterErrorRate_1.4"]
    
    merged_df["WordErrorRate_Comparison"] = merged_df["WordErrorRate_Diff"].apply(
        lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
            f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
                f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
            )
        )
    )
    merged_df["CharacterErrorRate_Comparison"] = merged_df["CharacterErrorRate_Diff"].apply(
        lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
            f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
                f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
            )
        )
    )
    
    avg_word_diff = merged_df["WordErrorRate_Diff"].loc[merged_df["WordErrorRate_Diff"].abs() <= max_num].mean()
    avg_char_diff = merged_df["CharacterErrorRate_Diff"].loc[merged_df["CharacterErrorRate_Diff"].abs() <= 1].mean()
    overall_summary = f"""
    <h3>Overall Comparison:</h3>
    <p>Average WordErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_word_diff:.8f})' if avg_word_diff < 0 else f'1.4 is stronger ({0 - avg_word_diff:.8f})'}</p>
    <p>Average CharacterErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_char_diff:.8f})' if avg_char_diff < 0 else f'1.4 is stronger ({0 - avg_char_diff:.8f})'}</p>
    """
    
    def get_audio_files(uuid):
        file_1_5 = os.path.join("fish-speech-1.5", f"{uuid}.wav")
        file_1_4 = os.path.join("fish-speech-1.4", f"{uuid}.wav")
        return file_1_5, file_1_4
    
    audio_files = []
    for uuid in merged_df["SourceText"]:
        file_1_5, file_1_4 = get_audio_files(uuid)
        audio_files.append((file_1_5, file_1_4))
    
    result = merged_df[[
        "SourceText",
        "WordErrorRate_1.5", "WordErrorRate_1.4", "WordErrorRate_Comparison",
        "CharacterErrorRate_1.5", "CharacterErrorRate_1.4", "CharacterErrorRate_Comparison",
        "WhisperText_1.5", "WhisperText_1.4"
    ]]

    # Add audio columns to the result for Gradio interface
    audio_columns = [
        gr.Audio(value=file_1_5) for file_1_5, _ in audio_files
    ] + [
        gr.Audio(value=file_1_4) for _, file_1_4 in audio_files
    ]
    
    return overall_summary + result.to_html(index=False), *audio_columns

max_num = gr.Number(value=10)
gr.Interface(
    fn=compare_csv_files,
    inputs=[max_num],
    outputs=["html"] + [gr.Audio() for _ in range(len(df1))],  # Dynamically add audio outputs
    title="Fish Speech Benchmark",
    description="This is a non-official model performance test from Fish Speech / Whisper Base / More data will be added later (not too much)"
).launch()