File size: 5,384 Bytes
092fe0d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import sklearn
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigs
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import hamming_loss
from sklearn.metrics import roc_auc_score
import pandas as pd
from random import sample
# np.set_printoptions(threshold='nan')
class Metric(object):
def __init__(self,output,label):
self.output = output #prediction label matric
self.label = label #true label matric
def accuracy_subset(self,threash=0.5):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
accuracy=accuracy_score(y_true,y_pred)
return accuracy
def accuracy(self,threash=0.5):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
accuracy=sklearn.metrics.accuracy_score(y_true, y_pred, normalize=True, sample_weight=None)
return accuracy
def accuracy_multiclass(self):
y_pred =self.output
y_true = self.label
accuracy=accuracy_score(np.argmax(y_pred,1),np.argmax(y_true,1))
return accuracy
def micfscore(self,threash=0.5,type='micro'):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
return f1_score(y_pred,y_true,average=type)
def macfscore(self,threash=0.5,type='macro'):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
return f1_score(y_pred,y_true,average=type)
def hamming_distance(self,threash=0.5):
y_pred =self.output
y_true = self.label
y_pred=np.where(y_pred>threash,1,0)
return hamming_loss(y_true,y_pred)
def fscore_class(self,type='micro'):
y_pred =self.output
y_true = self.label
return f1_score(np.argmax(y_pred,1),np.argmax(y_true,1),average=type)
def auROC(self):
y_pred =self.output
y_true = self.label
row,col = y_true.shape
temp = []
ROC = 0
for i in range(col):
try:
ROC = roc_auc_score(y_true[:,i], y_pred[:,i], average='micro', sample_weight=None)
except:
ROC == 0.5
temp.append(ROC)
for i in range(col):
ROC += float(temp[i])
return ROC/(col+1),temp
def MacroAUC(self):
y_pred =self.output #num_instance*num_label
y_true = self.label #num_instance*num_label
num_instance,num_class = y_pred.shape
count = np.zeros((num_class,1)) # store the number of postive instance'score>negative instance'score
num_P_instance = np.zeros((num_class,1)) #number of positive instance for every label
num_N_instance = np.zeros((num_class,1))
auc = np.zeros((num_class,1)) # for each label
count_valid_label = 0
for i in range(num_class):
num_P_instance[i,0] = sum(y_true[:,i] == 1) #label,,test_target
num_N_instance[i,0] = num_instance - num_P_instance[i,0]
# exclude the label on which all instances are positive or negative,
# leading to num_P_instance(i,1) or num_N_instance(i,1) is zero
if num_P_instance[i,0] == 0 or num_N_instance[i,0] == 0:
auc[i,0] = 0
count_valid_label = count_valid_label + 1
else:
temp_P_Outputs = np.zeros((int(num_P_instance[i,0]), num_class))
temp_N_Outputs = np.zeros((int(num_N_instance[i,0]), num_class))
#
temp_P_Outputs[:,i] = y_pred[y_true[:,i]==1,i]
temp_N_Outputs[:,i] = y_pred[y_true[:,i]==0,i]
for m in range(int(num_P_instance[i,0])):
for n in range(int(num_N_instance[i,0])):
if(temp_P_Outputs[m,i] > temp_N_Outputs[n,i] ):
count[i,0] = count[i,0] + 1
elif(temp_P_Outputs[m,i] == temp_N_Outputs[n,i]):
count[i,0] = count[i,0] + 0.5
auc[i,0] = count[i,0]/(num_P_instance[i,0]*num_N_instance[i,0])
macroAUC1 = sum(auc)/(num_class-count_valid_label)
return float(macroAUC1), auc
def bootstrap_auc(label, output, classes, bootstraps=5, fold_size=1000):
statistics = np.zeros((len(classes), bootstraps))
for c in range(len(classes)):
for i in range(bootstraps):
L=[]
for k in range(len(label)):
L.append([output[k],label[k]])
if fold_size <= len(L):
X = sample(L, fold_size)
else:
fold_size == len(L)
X = sample(L, fold_size)
for b in range(len(X)):
if b ==0:
Output = np.array([X[b][0]])
Label = np.array([X[b][1]])
Output = np.concatenate((Output, np.array([X[b][0]])),axis=0)
Label = np.concatenate((Label, np.array([X[b][1]])),axis=0)
myMetic = Metric(Output,Label)
AUROC1, auc = myMetic.auROC()
statistics[c][i] = AUROC1
return statistics
|