File size: 3,529 Bytes
bdee176
 
 
486a085
fd604c4
 
486a085
 
 
 
1c61313
486a085
 
bdee176
1c61313
 
811f643
1c61313
 
 
 
 
7d3a98a
811f643
 
1c61313
6e75d7c
05121a3
94661bc
 
 
6e75d7c
 
 
05121a3
94661bc
 
 
6e75d7c
 
bdee176
1c61313
c6b858f
 
6f014a9
 
b28fe12
 
6f014a9
 
1c61313
bd17ee0
1c61313
6e75d7c
 
b4b3e6a
bd17ee0
bdee176
bd17ee0
811f643
486a085
 
 
1c61313
 
bdee176
 
486a085
 
 
 
bd17ee0
06d3610
486a085
bdee176
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pandas as pd
import gradio as gr

def compare_csv_files(selected_languages, model_size):
    max_num = 10
    
    # Construct file names dynamically based on model size
    file_1_5 = f"result_1.5_{model_size}.csv"
    file_1_4 = f"result_1.4_{model_size}.csv"
    
    # Load data
    df1 = pd.read_csv(file_1_5)
    df2 = pd.read_csv(file_1_4)
    
    # Merge with Language column
    merged_df = pd.merge(df1, df2, on=["SourceText", "Language"], suffixes=("_1.5", "_1.4"))
    
    # Filter by selected languages
    if selected_languages:
        merged_df = merged_df[merged_df["Language"].isin(selected_languages)]
    
    # Calculate differences
    merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1.5"] - merged_df["WordErrorRate_1.4"]
    merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1.5"] - merged_df["CharacterErrorRate_1.4"]
    
    # Add comparison columns
    merged_df["WordErrorRate_Comparison"] = merged_df["WordErrorRate_Diff"].apply(
        lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
            f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
                f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
            )
        )
    )
    merged_df["CharacterErrorRate_Comparison"] = merged_df["CharacterErrorRate_Diff"].apply(
        lambda x: "1.4 is the same as 1.5 (Ignored due to large diff)" if abs(x) > max_num else (
            f"1.5 is stronger than 1.4 ({x:.8f})" if x < 0 else (
                f"1.4 is stronger than 1.5 ({-x:.8f})" if x > 0 else "1.4 is the same as 1.5 (0)"
            )
        )
    )
    
    # Overall averages
    avg_word_diff = merged_df["WordErrorRate_Diff"].loc[merged_df["WordErrorRate_Diff"].abs() <= max_num].mean()
    avg_char_diff = merged_df["CharacterErrorRate_Diff"].loc[merged_df["CharacterErrorRate_Diff"].abs() <= 1].mean()
    overall_summary = f"""
    <h3>Overall Comparison:</h3>
    <p>Average WordErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_word_diff:.8f})' if avg_word_diff < 0 else f'1.4 is stronger ({0 - avg_word_diff:.8f})' if avg_word_diff > 0 else "1.4 is the same as 1.5 (0)"}</p>
    <p>Average CharacterErrorRate Difference (excluding large diffs): {f'1.5 is stronger ({avg_char_diff:.8f})' if avg_char_diff < 0 else f'1.4 is stronger ({0 - avg_char_diff:.8f})' if avg_word_diff > 0 else "1.4 is the same as 1.5 (0)"}</p>
    """
    
    # Generate result HTML
    result_html = overall_summary + merged_df[[
        "Language",
        "SourceText",
        "WordErrorRate_1.5", "WordErrorRate_1.4", "WordErrorRate_Comparison",
        "CharacterErrorRate_1.5", "CharacterErrorRate_1.4", "CharacterErrorRate_Comparison",
    ]].to_html(escape=False, index=False)
    
    return result_html

# Load unique languages from the data (defaulting to Base files for initialization)
df1 = pd.read_csv("result_1.5_Base.csv")
df2 = pd.read_csv("result_1.4_Base.csv")
languages = sorted(set(df1["Language"]).union(set(df2["Language"])))

gr.Interface(
    fn=compare_csv_files,
    inputs=[
        gr.CheckboxGroup(choices=languages, label="Select Languages to Compare"),
        gr.Dropdown(choices=["Base", "Medium"], label="Select Whisper Model Size", value="Base")
    ],
    outputs="html",
    title="Fish Speech Benchmark",
    description="Select specific languages and model sizes (Base or Medium) to compare the results of WordErrorRate and CharacterErrorRate."
).launch()