import pandas as pd
import gradio as gr

def compare_csv_files():
    # 文件名
    file1 = "fish-speech-1.5.csv"
    file2 = "fish-speech-1.4.csv"
    
    # 读取 CSV 文件
    df1 = pd.read_csv(file1)
    df2 = pd.read_csv(file2)
    
    # 使用 SourceText 进行合并
    merged_df = pd.merge(df1, df2, on="SourceText", suffixes=("_1", "_2"))
    
    if merged_df.empty:
        return "两个文件中没有相同的 SourceText,请检查数据。"
    
    # 计算差异
    merged_df["WordErrorRate_Diff"] = merged_df["WordErrorRate_1"] - merged_df["WordErrorRate_2"]
    merged_df["CharacterErrorRate_Diff"] = merged_df["CharacterErrorRate_1"] - merged_df["CharacterErrorRate_2"]
    
    # 返回对比结果
    comparison = merged_df[[
        "SourceText", 
        "UUID_1", "WhisperText_1", "WordErrorRate_1", "CharacterErrorRate_1",
        "UUID_2", "WhisperText_2", "WordErrorRate_2", "CharacterErrorRate_2",
        "WordErrorRate_Diff", "CharacterErrorRate_Diff"
    ]]
    
    return comparison.to_html(index=False)

# Gradio 界面
gr.Interface(
    fn=compare_csv_files,
    inputs=None,
    outputs="html",
    title="CSV 文件对比工具",
    description="自动加载目录下的 fish-speech-1.5.csv 和 fish-speech-1.4.csv,对比它们的 WordErrorRate 和 CharacterErrorRate 差异。",
).launch()