File size: 5,384 Bytes
092fe0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import sklearn
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigs
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import hamming_loss
from sklearn.metrics import roc_auc_score
import pandas as pd
from random import sample

# np.set_printoptions(threshold='nan')
class Metric(object):
    def __init__(self,output,label):
        self.output = output   #prediction label matric
        self.label = label    #true  label matric
 
    def accuracy_subset(self,threash=0.5):
        y_pred =self.output
        y_true = self.label
        y_pred=np.where(y_pred>threash,1,0)
        accuracy=accuracy_score(y_true,y_pred)
        return accuracy
    
    def accuracy(self,threash=0.5):
        y_pred =self.output
        y_true = self.label      
        y_pred=np.where(y_pred>threash,1,0)
        accuracy=sklearn.metrics.accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)
        return accuracy
    
    def accuracy_multiclass(self):
        y_pred =self.output
        y_true = self.label     
        accuracy=accuracy_score(np.argmax(y_pred,1),np.argmax(y_true,1))
        return accuracy
    
    def micfscore(self,threash=0.5,type='micro'):
        y_pred =self.output
        y_true = self.label
        y_pred=np.where(y_pred>threash,1,0)
        return f1_score(y_pred,y_true,average=type)

    def macfscore(self,threash=0.5,type='macro'):
        y_pred =self.output
        y_true = self.label
        y_pred=np.where(y_pred>threash,1,0)
        return f1_score(y_pred,y_true,average=type)
    
    def hamming_distance(self,threash=0.5):
        y_pred =self.output
        y_true = self.label
        y_pred=np.where(y_pred>threash,1,0)
        return hamming_loss(y_true,y_pred)
    
    def fscore_class(self,type='micro'):
        y_pred =self.output
        y_true = self.label
        return f1_score(np.argmax(y_pred,1),np.argmax(y_true,1),average=type)
    
    def auROC(self):
        y_pred =self.output
        y_true = self.label
        row,col = y_true.shape
        temp = []
        ROC = 0
        for i in range(col):
            try:
                ROC = roc_auc_score(y_true[:,i], y_pred[:,i], average='micro', sample_weight=None)
            except:
                ROC == 0.5  
            temp.append(ROC)
        for i in range(col):
            ROC += float(temp[i])
        return ROC/(col+1),temp
    
    def MacroAUC(self):
        y_pred =self.output #num_instance*num_label
        y_true = self.label #num_instance*num_label 
        num_instance,num_class =  y_pred.shape
        count = np.zeros((num_class,1))   # store the number of postive instance'score>negative instance'score
        num_P_instance =  np.zeros((num_class,1)) #number of positive instance for every label      
        num_N_instance =  np.zeros((num_class,1)) 
        auc = np.zeros((num_class,1))  # for each label
        count_valid_label = 0
        for  i in range(num_class): 
            num_P_instance[i,0] = sum(y_true[:,i] == 1) #label,,test_target
            num_N_instance[i,0] = num_instance - num_P_instance[i,0]
            # exclude the label on which all instances are positive or negative,
            # leading to num_P_instance(i,1) or num_N_instance(i,1) is zero
            if num_P_instance[i,0] == 0 or num_N_instance[i,0] == 0:
                auc[i,0] = 0
                count_valid_label = count_valid_label + 1
            else:
                temp_P_Outputs = np.zeros((int(num_P_instance[i,0]), num_class))
                temp_N_Outputs = np.zeros((int(num_N_instance[i,0]), num_class))
                #
                temp_P_Outputs[:,i] = y_pred[y_true[:,i]==1,i]
                temp_N_Outputs[:,i] = y_pred[y_true[:,i]==0,i]    
                for m in range(int(num_P_instance[i,0])):
                    for n in range(int(num_N_instance[i,0])):
                        if(temp_P_Outputs[m,i] > temp_N_Outputs[n,i] ):
                            count[i,0] = count[i,0] + 1
                        elif(temp_P_Outputs[m,i] == temp_N_Outputs[n,i]):
                            count[i,0] = count[i,0] + 0.5
                
                auc[i,0] = count[i,0]/(num_P_instance[i,0]*num_N_instance[i,0])  
        macroAUC1 = sum(auc)/(num_class-count_valid_label)
        return  float(macroAUC1), auc


def bootstrap_auc(label, output, classes, bootstraps=5, fold_size=1000):
    statistics = np.zeros((len(classes), bootstraps))
    for c in range(len(classes)):
        for i in range(bootstraps):
            L=[]
            for k in range(len(label)):
                L.append([output[k],label[k]])
            if fold_size <= len(L):
                X = sample(L, fold_size)
            else:
                fold_size == len(L)
                X = sample(L, fold_size)
            for b in range(len(X)):
                if b ==0:
                    Output =  np.array([X[b][0]])
                    Label =  np.array([X[b][1]])
                Output = np.concatenate((Output, np.array([X[b][0]])),axis=0)
                Label = np.concatenate((Label, np.array([X[b][1]])),axis=0)
                
            myMetic = Metric(Output,Label)
            AUROC1, auc = myMetic.auROC()
            statistics[c][i] = AUROC1
    return statistics