Open_Voice_Cloning_Leaderboard / src /process_leaderboard_data.py
iwonachristop's picture
Add: round results
fbff9e6
import pandas as pd
import gradio as gr
class Leaderboard():
def __init__(self, data_path='results.csv'):
self.data_path = data_path
self.models_performance_dataframe = self.initialize_models_performance_dataframe()
self.link_dict = self.initialize_link_dict()
self.dataset_list = sorted(list(set(self.models_performance_dataframe['dataset'])))
self.emotional_dataset_list = sorted(self.initialize_emotional_datasets())
self.emotion_list = sorted(list(set(self.models_performance_dataframe['emotion'])))
def get_dataset_list(self) -> list:
return self.dataset_list
def get_emotional_dataset_list(self) -> list:
return self.emotional_dataset_list
def get_emotion_list(self) -> list:
return self.emotion_list
def get_models_performance_dataframe_column(self) -> list:
return self.models_performance_dataframe.columns
# Load the models performance dataframe from a CSV file
def initialize_models_performance_dataframe(self) -> pd.DataFrame:
model_performance_dataframe = pd.read_csv(self.data_path)
initial_columns = ['model', 'dataset', 'emotion', 'wavlm']
columns = initial_columns + [kol for kol in model_performance_dataframe.columns if kol not in initial_columns]
return model_performance_dataframe[columns]
# Initialize a list of emotional datasets
def initialize_emotional_datasets(self) -> list:
emotional_dataset_list = set()
for _, row in self.models_performance_dataframe.iterrows():
if row['emotion'] != 'All':
emotional_dataset_list.add(row['dataset'])
return list(emotional_dataset_list)
# Dictionary mapping model names to their GitHub repository links
def initialize_link_dict(self) -> dict:
return {
'WhisperSpeech/WhisperSpeech': 'https://huggingface.co/WhisperSpeech/WhisperSpeech',
'microsoft/speecht5_vc': 'https://huggingface.co/microsoft/speecht5_vc',
'Plachtaa/VALL-E-X': 'https://github.com/Plachtaa/VALL-E-X',
'coqui/XTTS-v2': 'https://huggingface.co/coqui/XTTS-v2',
'OuteAI/OuteTTS-0.2-500M': 'https://huggingface.co/OuteAI/OuteTTS-0.2-500M'
}
def create_leaderboard_data(self, selected_emotion_or_dataset, selected_feature, emotion_or_dataset) -> pd.DataFrame:
'''
Create leaderboard data.
Args:
selected_emotion_or_dataset (str): The selected emotion or dataset to filter the leaderboard data.
selected_feature (str): The selected feature to display in the leaderboard.
emotion_or_dataset (str): Indicates whether the selection is based on 'emotion' or 'dataset'.
Returns:
pd.DataFrame: A dataframe containing the leaderboard data.
'''
# Create a copy of the dataframe.
models_performance = self.models_performance_dataframe
# Determine the opposite selection emotion or dataset
opposite_emotion_or_dataset = 'dataset' if emotion_or_dataset == 'emotion' else 'emotion'
# Filter the dataframe based on the selected emotion or dataset.
models_performance = models_performance[models_performance[emotion_or_dataset] == selected_emotion_or_dataset]
# Creating a dictionary that aggregates information from the dataframe for each model.
leaderboard_data = {}
for _, row in models_performance.iterrows():
if row['model'] not in leaderboard_data.keys():
leaderboard_data[row['model']] ={}
if row[opposite_emotion_or_dataset] == 'All':
leaderboard_data[row['model']]['Average'] = row[selected_feature]
else:
leaderboard_data[row['model']][row[opposite_emotion_or_dataset]] = row[selected_feature]
# Creating a dataframe based on leaderboard_data dictionary
data = []
for model, performance in leaderboard_data.items():
row = {'Model': model}
row.update(performance)
data.append(row)
leaderboard_dataframe = pd.DataFrame(data)
# Ensure specific columns appear first in the dataframe.
if 'LS Clean' in leaderboard_dataframe.keys():
initial_columns = ['Model', 'Average', 'LS Clean']
else:
initial_columns = ['Model', 'Average']
# Add other columns in sorted order.
sorted_columns = initial_columns + sorted([col for col in leaderboard_dataframe.columns if col not in initial_columns])
leaderboard_dataframe = leaderboard_dataframe[sorted_columns]
# Round results
for column_name in leaderboard_dataframe.columns:
if column_name != "Model":
leaderboard_dataframe[column_name] = leaderboard_dataframe[column_name].round(4)
# Map model names to hyperlinks using the link_dict.
leaderboard_dataframe['Model'] = leaderboard_dataframe['Model'].map(lambda model: f"[{model}]({self.link_dict.get(model, '')})")
leaderboard_dataframe.sort_values(by='Average', ascending=False, inplace=True)
leaderboard_dataframe.insert(0, "Rank", range(1, len(leaderboard_dataframe) + 1))
return leaderboard_dataframe
def update_leaderboard_data_in_emotion_section(self, selected_emotion_or_dataset, emotion_or_dataset, leaderboard_table) -> pd.DataFrame:
'''
Update leaderboard data based on selected emotion or dataset in the emotion section
Args:
selected_emotion_or_dataset (str): The selected emotion or dataset to filter the leaderboard data.
emotion_or_dataset (str): Indicates whether the selection is based on 'emotion' or 'dataset'.
leaderboard_table (pd.Dataframe): previous leaderboard data.
Returns:
pd.DataFrame: A dataframe containing the leaderboard data.
'''
if selected_emotion_or_dataset != None:
Leaderboard_dataframe = self.create_leaderboard_data(selected_emotion_or_dataset, 'wavlm', emotion_or_dataset)
# Drop models without emotion-based division
for dataset in self.dataset_list:
if dataset in Leaderboard_dataframe.columns and dataset not in self.emotional_dataset_list:
Leaderboard_dataframe.drop(columns=[dataset], inplace=True)
return gr.update(value=None), Leaderboard_dataframe
else:
return gr.update(), leaderboard_table
def update_leaderboard_data_in_feature_section(self, selected_emotion_or_dataset, selected_feature, emotion_or_dataset, leaderboard_table) -> pd.DataFrame:
'''
Update leaderboard data based on selected emotion or dataset in the feature section
Args:
selected_emotion_or_dataset (str): The selected emotion or dataset to filter the leaderboard data.
selected_feature (str): The selected feature to display in the leaderboard.
emotion_or_dataset (str): Indicates whether the selection is based on 'emotion' or 'dataset'.
leaderboard_table (pd.Dataframe): previous leaderboard data.
Returns:
pd.DataFrame: A dataframe containing the leaderboard data.
'''
if selected_emotion_or_dataset != None:
return gr.update(value=None), self.create_leaderboard_data(selected_emotion_or_dataset, selected_feature, emotion_or_dataset)
else:
return gr.update(), leaderboard_table
def update_leaderboard_data_by_feature(self, emotion, dataset, selected_feature) -> pd.DataFrame:
'''
Update leaderboard data based on the selected feature
Args:
emotion (str): Currently selected emotion to filter the leaderboard data.
dataset (str): Currently selected dataset to filter the leaderboard data
selected_feature (str): The selected feature to display in the leaderboard.
Returns:
pd.DataFrame: A dataframe containing the leaderboard data.
'''
if emotion != None:
return self.create_leaderboard_data(emotion, selected_feature, 'emotion')
else:
return self.create_leaderboard_data(dataset, selected_feature, 'dataset')