File size: 8,376 Bytes
33a4cab
 
 
 
 
d00169e
33a4cab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d00169e
 
 
 
 
33a4cab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fbff9e6
 
33a4cab
 
 
 
 
 
 
fbff9e6
 
 
 
 
33a4cab
 
3f602c8
d00169e
33a4cab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import pandas as pd
import gradio as gr


class Leaderboard():
    def __init__(self, data_path='results.csv'):
        self.data_path = data_path
        self.models_performance_dataframe = self.initialize_models_performance_dataframe()
        self.link_dict = self.initialize_link_dict()
        self.dataset_list = sorted(list(set(self.models_performance_dataframe['dataset'])))
        self.emotional_dataset_list = sorted(self.initialize_emotional_datasets())
        self.emotion_list = sorted(list(set(self.models_performance_dataframe['emotion'])))

    def get_dataset_list(self) -> list:
        return self.dataset_list
    
    def get_emotional_dataset_list(self) -> list:
        return self.emotional_dataset_list
    
    def get_emotion_list(self) -> list:
        return self.emotion_list

    def get_models_performance_dataframe_column(self) -> list:
        return self.models_performance_dataframe.columns

    # Load the models performance dataframe from a CSV file
    def initialize_models_performance_dataframe(self) -> pd.DataFrame:
        model_performance_dataframe = pd.read_csv(self.data_path)
        initial_columns = ['model', 'dataset', 'emotion', 'wavlm']
        columns = initial_columns + [kol for kol in model_performance_dataframe.columns if kol not in initial_columns]
        return model_performance_dataframe[columns]
    
    # Initialize a list of emotional datasets    
    def initialize_emotional_datasets(self) -> list:
        emotional_dataset_list = set()
        for _, row in self.models_performance_dataframe.iterrows():
            if row['emotion'] != 'All':
                emotional_dataset_list.add(row['dataset'])
        return list(emotional_dataset_list)
    
    # Dictionary mapping model names to their GitHub repository links
    def initialize_link_dict(self) -> dict:
        return {
            'WhisperSpeech/WhisperSpeech': 'https://huggingface.co/WhisperSpeech/WhisperSpeech',
            'microsoft/speecht5_vc': 'https://huggingface.co/microsoft/speecht5_vc',
            'Plachtaa/VALL-E-X': 'https://github.com/Plachtaa/VALL-E-X',
            'coqui/XTTS-v2': 'https://huggingface.co/coqui/XTTS-v2',
            'OuteAI/OuteTTS-0.2-500M': 'https://huggingface.co/OuteAI/OuteTTS-0.2-500M'
        }
        

    def create_leaderboard_data(self, selected_emotion_or_dataset, selected_feature, emotion_or_dataset) -> pd.DataFrame:
        '''
        Create leaderboard data.

        Args:
            selected_emotion_or_dataset (str): The selected emotion or dataset to filter the leaderboard data.
            selected_feature (str): The selected feature to display in the leaderboard.
            emotion_or_dataset (str): Indicates whether the selection is based on 'emotion' or 'dataset'.

        Returns:
            pd.DataFrame: A dataframe containing the leaderboard data. 
        '''
        # Create a copy of the dataframe.
        models_performance = self.models_performance_dataframe 

        # Determine the opposite selection emotion or dataset
        opposite_emotion_or_dataset = 'dataset' if emotion_or_dataset == 'emotion' else 'emotion'

        # Filter the dataframe based on the selected emotion or dataset.
        models_performance = models_performance[models_performance[emotion_or_dataset] == selected_emotion_or_dataset]

        # Creating a dictionary that aggregates information from the dataframe for each model.
        leaderboard_data = {}
        for _, row in models_performance.iterrows():
            if row['model'] not in leaderboard_data.keys():
                leaderboard_data[row['model']] ={}
            if row[opposite_emotion_or_dataset] == 'All':
                leaderboard_data[row['model']]['Average'] = row[selected_feature]
            else:
                leaderboard_data[row['model']][row[opposite_emotion_or_dataset]] = row[selected_feature]
            
        # Creating a dataframe based on leaderboard_data dictionary
        data = []
        for model, performance in leaderboard_data.items():
            row = {'Model': model}
            row.update(performance)
            data.append(row)
        leaderboard_dataframe = pd.DataFrame(data)
        
        # Ensure specific columns appear first in the dataframe.
        if 'LS Clean' in leaderboard_dataframe.keys():
            initial_columns = ['Model', 'Average', 'LS Clean']
        else:
            initial_columns = ['Model', 'Average']

        # Add other columns in sorted order.
        sorted_columns = initial_columns + sorted([col for col in leaderboard_dataframe.columns if col not in initial_columns])
        leaderboard_dataframe = leaderboard_dataframe[sorted_columns]

        # Round results
        for column_name in leaderboard_dataframe.columns:
            if column_name != "Model":
                leaderboard_dataframe[column_name] = leaderboard_dataframe[column_name].round(4)

        # Map model names to hyperlinks using the link_dict.
        leaderboard_dataframe['Model'] = leaderboard_dataframe['Model'].map(lambda model: f"[{model}]({self.link_dict.get(model, '')})")
        leaderboard_dataframe.sort_values(by='Average', ascending=False, inplace=True)
        leaderboard_dataframe.insert(0, "Rank", range(1, len(leaderboard_dataframe) + 1))
        return leaderboard_dataframe

    
    def update_leaderboard_data_in_emotion_section(self, selected_emotion_or_dataset, emotion_or_dataset, leaderboard_table) -> pd.DataFrame:    
        '''
        Update leaderboard data based on selected emotion or dataset in the emotion section

        Args:
            selected_emotion_or_dataset (str): The selected emotion or dataset to filter the leaderboard data.
            emotion_or_dataset (str): Indicates whether the selection is based on 'emotion' or 'dataset'.
            leaderboard_table (pd.Dataframe): previous leaderboard data.

        Returns:
            pd.DataFrame: A dataframe containing the leaderboard data.
        '''
        if selected_emotion_or_dataset != None:
            Leaderboard_dataframe = self.create_leaderboard_data(selected_emotion_or_dataset, 'wavlm', emotion_or_dataset)

            # Drop models without emotion-based division
            for dataset in self.dataset_list:
                if dataset in Leaderboard_dataframe.columns and dataset not in self.emotional_dataset_list:
                    Leaderboard_dataframe.drop(columns=[dataset], inplace=True)

            return gr.update(value=None), Leaderboard_dataframe
        else:
            return gr.update(), leaderboard_table 


    def update_leaderboard_data_in_feature_section(self, selected_emotion_or_dataset, selected_feature, emotion_or_dataset, leaderboard_table) -> pd.DataFrame:
        '''
        Update leaderboard data based on selected emotion or dataset in the feature section

        Args:
            selected_emotion_or_dataset (str): The selected emotion or dataset to filter the leaderboard data.
            selected_feature (str): The selected feature to display in the leaderboard.
            emotion_or_dataset (str): Indicates whether the selection is based on 'emotion' or 'dataset'.
            leaderboard_table (pd.Dataframe): previous leaderboard data.

        Returns:
            pd.DataFrame: A dataframe containing the leaderboard data.
        '''
        if selected_emotion_or_dataset != None:
            return gr.update(value=None), self.create_leaderboard_data(selected_emotion_or_dataset, selected_feature, emotion_or_dataset)
        else:
            return gr.update(), leaderboard_table
        

    def update_leaderboard_data_by_feature(self, emotion, dataset, selected_feature) -> pd.DataFrame:
        '''
        Update leaderboard data based on the selected feature

        Args:
            emotion (str): Currently selected emotion to filter the leaderboard data.
            dataset (str): Currently selected dataset to filter the leaderboard data
            selected_feature (str): The selected feature to display in the leaderboard.

        Returns:
            pd.DataFrame: A dataframe containing the leaderboard data.
        '''
        if emotion != None:
            return self.create_leaderboard_data(emotion, selected_feature, 'emotion')
        else:
            return self.create_leaderboard_data(dataset, selected_feature, 'dataset')