|
import os |
|
import re |
|
import sys |
|
import json |
|
import random |
|
|
|
import pandas as pd |
|
import numpy as np |
|
|
|
from app.content import * |
|
|
|
data_to_df = [] |
|
|
|
|
|
log_dir = "path/to/audiobench/log" |
|
|
|
all_evaluated_models = os.listdir(log_dir) |
|
for model_name in all_evaluated_models: |
|
if "geyu_whisper" in model_name: |
|
continue |
|
|
|
if "activation_checkpointing" in model_name: |
|
continue |
|
|
|
model_dir = os.path.join(log_dir, model_name) |
|
|
|
if not os.path.isdir(model_dir): |
|
continue |
|
|
|
for log_file in os.listdir(model_dir): |
|
if not log_file.endswith("score.json"): |
|
continue |
|
|
|
match = re.match("^(.*?)_(llama3_70b_judge|wer|bleu)_score.json$", log_file) |
|
ds_name = match.group(1) |
|
metrics = match.group(2) |
|
|
|
eval_path = os.path.join(model_dir, log_file) |
|
|
|
with open(eval_path, "r") as f: |
|
eval_data = json.load(f) |
|
|
|
if metrics == "llama3_70b_judge": |
|
value = eval_data[metrics]["judge_score"] |
|
elif metrics == "wer": |
|
value = eval_data[metrics] |
|
elif metrics == "bleu": |
|
value = eval_data[metrics] |
|
|
|
data_to_df.append([model_name, ds_name, metrics, value]) |
|
|
|
|
|
eval_result_df = pd.DataFrame(data_to_df, columns=["model", "dataset", "metrics", "value"]) |
|
eval_result_df["model"] = eval_result_df["model"].replace("MERaLiON_AudioLLM_v1_hf", "MERaLiON-AudioLLM-Whisper-SEA-LION") |
|
|
|
|
|
archive_results_dir = "results_organized_archive" |
|
output_results_dir = "results_organized" |
|
|
|
|
|
def merge_results(display_datasets, metrics, result_sub_path=None): |
|
raw_ds_names = [displayname2datasetname[dis_name] for dis_name in display_datasets] |
|
|
|
new_result = eval_result_df[eval_result_df["dataset"].isin(raw_ds_names) & (eval_result_df["metrics"] == metrics)] |
|
new_result = new_result.drop(columns=["metrics"]) |
|
new_result = new_result.pivot(index="model", columns="dataset", values="value").reset_index() |
|
new_result = new_result.rename(columns={"model": "Model"}) |
|
new_result = new_result.dropna(axis=0, how="any") |
|
|
|
archive_result_path = os.path.join(archive_results_dir, result_sub_path) |
|
if os.path.exists(archive_result_path): |
|
archive_result = pd.read_csv(archive_result_path) |
|
archive_columns = [col for col in archive_result.columns if col in raw_ds_names] |
|
archive_result = archive_result[["Model"] + archive_columns] |
|
combined_result = pd.concat([archive_result, new_result], ignore_index=True) |
|
combined_result = combined_result.drop_duplicates(subset=["Model"], keep="last", ignore_index=True) |
|
|
|
return new_result, combined_result |
|
|
|
return new_result, new_result |
|
|
|
|
|
result_file_mapper = { |
|
"bleu/st.csv": speech_translation_datasets, |
|
"llama3_70b_judge/accent_recognition.csv": ar_datasets, |
|
"llama3_70b_judge/audio_captioning.csv": ac_datasets, |
|
"llama3_70b_judge/audio_scene_question_answering.csv": asqa_datasets, |
|
"llama3_70b_judge/emotion_recognition.csv": er_datasets, |
|
"llama3_70b_judge/gender_recognition.csv": gr_datasets, |
|
"llama3_70b_judge/music_understanding.csv": music_datasets, |
|
"llama3_70b_judge/sds_singlish.csv": sds_datasets, |
|
"llama3_70b_judge/speech_instruction.csv": si_datasets, |
|
"llama3_70b_judge/sqa_english.csv": speech_qa_english_datasets, |
|
"llama3_70b_judge/sqa_singlish.csv": speech_qa_singlish_datasets, |
|
"llama3_70b_judge/under_development_llama3_70b_judge.csv": non_wer_development_datasets, |
|
"meteor/audio_captioning.csv": ac_datasets, |
|
"wer/asr_english.csv": asr_english_datasets, |
|
"wer/asr_singlish.csv": asr_singlish_datasets, |
|
"wer/asr_mandarin.csv": asr_mandarin_datasets, |
|
"wer/asr_malay.csv": asr_malay_datasets, |
|
"wer/asr_tamil.csv": asr_tamil_datasets, |
|
"wer/asr_indonesian.csv": asr_indonesian_datasets, |
|
"wer/asr_thai.csv": asr_thai_datasets, |
|
"wer/asr_vietnamese.csv": asr_vietnamese_datasets, |
|
"wer/asr_private.csv": asr_private_datasets, |
|
"wer/under_development_wer.csv": wer_development_datasets, |
|
} |
|
|
|
|
|
for sub_path, display_ds in result_file_mapper.items(): |
|
metrics = sub_path.split("/")[0] |
|
new_result, combined_result = merge_results(display_ds, metrics, sub_path) |
|
|
|
output_path = os.path.join(output_results_dir, sub_path) |
|
os.makedirs(os.path.dirname(output_path), exist_ok=True) |
|
combined_result.to_csv(output_path, index=False) |