/*! * Figue v1.0.1 * * Copyright 2010, Jean-Yves Delort * Licensed under the MIT license. * */ var figue = function () { function euclidianDistance (vec1 , vec2) { var N = vec1.length ; var d = 0 ; for (var i = 0 ; i < N ; i++) d += Math.pow (vec1[i] - vec2[i], 2) d = Math.sqrt (d) ; return d ; } function manhattanDistance (vec1 , vec2) { var N = vec1.length ; var d = 0 ; for (var i = 0 ; i < N ; i++) d += Math.abs (vec1[i] - vec2[i]) return d ; } function maxDistance (vec1 , vec2) { var N = vec1.length ; var d = 0 ; for (var i = 0 ; i < N ; i++) d = Math.max (d , Math.abs (vec1[i] - vec2[i])) ; return d ; } function addVectors (vec1 , vec2) { var N = vec1.length ; var vec = new Array(N) ; for (var i = 0 ; i < N ; i++) vec[i] = vec1[i] + vec2[i] ; return vec ; } function multiplyVectorByValue (value , vec) { var N = vec.length ; var v = new Array(N) ; for (var i = 0 ; i < N ; i++) v[i] = value * vec[i] ; return v ; } function vectorDotProduct (vec1, vec2) { var N = vec1.length ; var s = 0 ; for (var i = 0 ; i < N ; i++) s += vec1[i] * vec2[i] ; return s ; } function repeatChar(c, n) { var str = ""; for (var i = 0 ; i < n ; i++) str += c ; return str ; } function calculateCentroid (c1Size , c1Centroid , c2Size , c2Centroid) { var newCentroid = new Array(c1Centroid.length) ; var newSize = c1Size + c2Size ; for (var i = 0 ; i < c1Centroid.length ; i++) newCentroid[i] = (c1Size * c1Centroid[i] + c2Size * c2Centroid[i]) / newSize ; return newCentroid ; } function centerString(str, width) { var diff = width - str.length ; if (diff < 0) return ; var halfdiff = Math.floor(diff / 2) ; return repeatChar (" " , halfdiff) + str + repeatChar (" " , diff - halfdiff) ; } function putString(str, width, index) { var diff = width - str.length ; if (diff < 0) return ; return repeatChar (" " , index) + str + repeatChar (" " , width - (str.length+index)) ; } function prettyVector(vector) { var vals = new Array(vector.length) ; var precision = Math.pow(10, figue.PRINT_VECTOR_VALUE_PRECISION) ; for (var i = 0 ; i < vector.length ; i++) vals[i] = Math.round(vector[i]*precision)/precision ; return vals.join(",") } function prettyValue(value) { var precision = Math.pow(10, figue.PRINT_VECTOR_VALUE_PRECISION) ; return String (Math.round(value*precision)/precision) ; } function generateDendogram(tree, sep, balanced, withLabel, withCentroid, withDistance) { var lines = new Array ; var centroidstr = prettyVector(tree.centroid) ; if (tree.isLeaf()) { var labelstr = String(tree.label) ; var len = 1; if (withCentroid) len = Math.max(centroidstr.length , len) ; if (withLabel) len = Math.max(labelstr.length , len) ; lines.push (centerString ("|" , len)) ; if (withCentroid) lines.push (centerString (centroidstr , len)) ; if (withLabel) lines.push (centerString (labelstr , len)) ; } else { var distancestr = prettyValue(tree.dist) ; var left_dendo = generateDendogram(tree.left ,sep, balanced,withLabel,withCentroid, withDistance) ; var right_dendo = generateDendogram(tree.right, sep, balanced,withLabel,withCentroid,withDistance) ; var left_bar_ix = left_dendo[0].indexOf("|") ; var right_bar_ix = right_dendo[0].indexOf("|") ; // calculate nb of chars of each line var len = sep + right_dendo[0].length + left_dendo[0].length ; if (withCentroid) len = Math.max(centroidstr.length , len) ; if (withDistance) len = Math.max(distancestr.length , len) ; // calculate position of new vertical bar var bar_ix = left_bar_ix + Math.floor(( left_dendo[0].length - (left_bar_ix) + sep + (1+right_bar_ix)) / 2) ; // add line with the new vertical bar lines.push (putString ("|" , len , bar_ix)) ; if (withCentroid) { lines.push (putString (centroidstr , len , bar_ix - Math.floor (centroidstr.length / 2))) ; //centerString (centroidstr , len)) ; } if (withDistance) { lines.push (putString (distancestr , len , bar_ix - Math.floor (distancestr.length / 2))) ; //centerString (centroidstr , len)) ; } // add horizontal line to connect the vertical bars of the lower level var hlineLen = sep + (left_dendo[0].length -left_bar_ix) + right_bar_ix+1 ; var hline = repeatChar ("_" , hlineLen) ; lines.push (putString(hline, len, left_bar_ix)) ; // IF: the user want the tree to be balanced: all the leaves have to be at the same level // THEN: if the left and right subtrees have not the same depth, add extra vertical bars to the top of the smallest subtree if (balanced && (left_dendo.length != right_dendo.length)) { var shortest ; var longest ; if (left_dendo.length > right_dendo.length) { longest = left_dendo ; shortest = right_dendo ; } else { longest = right_dendo ; shortest = left_dendo ; } // repeat the first line containing the vertical bar header = shortest[0] ; var toadd = longest.length - shortest.length ; for (var i = 0 ; i < toadd ; i++) { shortest.splice (0,0,header) ; } } // merge the left and right subtrees for (var i = 0 ; i < Math.max (left_dendo.length , right_dendo.length) ; i++) { var left = "" ; if (i < left_dendo.length) left = left_dendo[i] ; else left = repeatChar (" " , left_dendo[0].length) ; var right = "" ; if (i < right_dendo.length) right = right_dendo[i] ; else right = repeatChar (" " , right_dendo[0].length) ; lines.push(left + repeatChar (" " , sep) + right) ; var l = left + repeatChar (" " , sep) + right ; } } return lines ; } function agglomerate (labels, vectors, distance, linkage) { var N = vectors.length ; var dMin = new Array(N) ; var cSize = new Array(N) ; var matrixObj = new figue.Matrix(N,N); var distMatrix = matrixObj.mtx ; var clusters = new Array(N) ; var c1, c2, c1Cluster, c2Cluster, i, j, p, root , newCentroid ; if (distance == figue.EUCLIDIAN_DISTANCE) distance = euclidianDistance ; else if (distance == figue.MANHATTAN_DISTANCE) distance = manhattanDistance ; else if (distance == figue.MAX_DISTANCE) distance = maxDistance ; // Initialize distance matrix and vector of closest clusters for (i = 0 ; i < N ; i++) { dMin[i] = 0 ; for (j = 0 ; j < N ; j++) { if (i == j) distMatrix[i][j] = Infinity ; else distMatrix[i][j] = distance(vectors[i] , vectors[j]) ; if (distMatrix[i][dMin[i]] > distMatrix[i][j] ) dMin[i] = j ; } } // create leaves of the tree for (i = 0 ; i < N ; i++) { clusters[i] = [] ; clusters[i][0] = new Node (labels[i], null, null, 0, vectors[i]) ; cSize[i] = 1 ; } // Main loop for (p = 0 ; p < N-1 ; p++) { // find the closest pair of clusters c1 = 0 ; for (i = 0 ; i < N ; i++) { if (distMatrix[i][dMin[i]] < distMatrix[c1][dMin[c1]]) c1 = i ; } c2 = dMin[c1] ; // create node to store cluster info c1Cluster = clusters[c1][0] ; c2Cluster = clusters[c2][0] ; newCentroid = calculateCentroid ( c1Cluster.size , c1Cluster.centroid , c2Cluster.size , c2Cluster.centroid ) ; newCluster = new Node (-1, c1Cluster, c2Cluster , distMatrix[c1][c2] , newCentroid) ; clusters[c1].splice(0,0, newCluster) ; cSize[c1] += cSize[c2] ; // overwrite row c1 with respect to the linkage type for (j = 0 ; j < N ; j++) { if (linkage == figue.SINGLE_LINKAGE) { if (distMatrix[c1][j] > distMatrix[c2][j]) distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ; } else if (linkage == figue.COMPLETE_LINKAGE) { if (distMatrix[c1][j] < distMatrix[c2][j]) distMatrix[j][c1] = distMatrix[c1][j] = distMatrix[c2][j] ; } else if (linkage == figue.AVERAGE_LINKAGE) { var avg = ( cSize[c1] * distMatrix[c1][j] + cSize[c2] * distMatrix[c2][j]) / (cSize[c1] + cSize[j]) distMatrix[j][c1] = distMatrix[c1][j] = avg ; } } distMatrix[c1][c1] = Infinity ; // infinity ­out old row c2 and column c2 for (i = 0 ; i < N ; i++) distMatrix[i][c2] = distMatrix[c2][i] = Infinity ; // update dmin and replace ones that previous pointed to c2 to point to c1 for (j = 0; j < N ; j++) { if (dMin[j] == c2) dMin[j] = c1; if (distMatrix[c1][j] < distMatrix[c1][dMin[c1]]) dMin[c1] = j; } // keep track of the last added cluster root = newCluster ; } return root ; } function getRandomVectors(k, vectors) { /* Returns a array of k distinct vectors randomly selected from a the input array of vectors Returns null if k > n or if there are less than k distinct objects in vectors */ var n = vectors.length ; if ( k > n ) return null ; var selected_vectors = new Array(k) ; var selected_indices = new Array(k) ; var tested_indices = new Object ; var tested = 0 ; var selected = 0 ; var i , vector, select ; while (selected < k) { if (tested == n) return null ; var random_index = Math.floor(Math.random()*(n)) ; if (random_index in tested_indices) continue ; tested_indices[random_index] = 1; tested++ ; vector = vectors[random_index] ; select = true ; for (i = 0 ; i < selected ; i++) { if ( vector.compare (selected_vectors[i]) ) { select = false ; break ; } } if (select) { selected_vectors[selected] = vector ; selected_indices[selected] = random_index ; selected++ ; } } return {'vectors': selected_vectors, 'indices': selected_indices} ; } function kmeans (k, vectors) { var n = vectors.length ; var assignments = new Array(n) ; var clusterSizes = new Array(k) ; var repeat = true ; var nb_iters = 0 ; var centroids = null ; var t = getRandomVectors(k, vectors) ; if (t == null) return null ; else centroids = t.vectors ; while (repeat) { // assignment step for (var j = 0 ; j < k ; j++) clusterSizes[j] = 0 ; for (var i = 0 ; i < n ; i++) { var vector = vectors[i] ; var mindist = Number.MAX_VALUE ; var best ; for (var j = 0 ; j < k ; j++) { dist = euclidianDistance (centroids[j], vector) if (dist < mindist) { mindist = dist ; best = j ; } } clusterSizes[best]++ ; assignments[i] = best ; } // update centroids step var newCentroids = new Array(k) ; for (var j = 0 ; j < k ; j++) newCentroids[j] = null ; for (var i = 0 ; i < n ; i++) { cluster = assignments[i] ; if (newCentroids[cluster] == null) newCentroids[cluster] = vectors[i] ; else newCentroids[cluster] = addVectors (newCentroids[cluster] , vectors[i]) ; } for (var j = 0 ; j < k ; j++) { newCentroids[j] = multiplyVectorByValue (1/clusterSizes[j] , newCentroids[j]) ; } // check convergence repeat = false ; for (var j = 0 ; j < k ; j++) { if (! newCentroids[j].compare (centroids[j])) { repeat = true ; break ; } } centroids = newCentroids ; nb_iters++ ; // check nb of iters if (nb_iters > figue.KMEANS_MAX_ITERATIONS) repeat = false ; } return { 'centroids': centroids , 'assignments': assignments} ; } function fcmeans (k, vectors, epsilon, fuzziness) { var membershipMatrix = new Matrix (vectors.length, k) ; var repeat = true ; var nb_iters = 0 ; var centroids = null ; var i,j,l, tmp, norm, max, diff ; while (repeat) { // initialize or update centroids if (centroids == null) { tmp = getRandomVectors(k, vectors) ; if (tmp == null) return null ; else centroids = tmp.vectors ; } else { for (j = 0 ; j < k; j++) { centroids[j] = [] ; norm = 0 ; for (i = 0 ; i < membershipMatrix.rows ; i++) { norm += Math.pow(membershipMatrix.mtx[i][j], fuzziness) ; tmp = multiplyVectorByValue( Math.pow(membershipMatrix.mtx[i][j], fuzziness) , vectors[i]) ; if (i == 0) centroids[j] = tmp ; else centroids[j] = addVectors (centroids[j] , tmp) ; } if (norm > 0) centroids[j] = multiplyVectorByValue(1/norm, centroids[j]); } } // update the degree of membership of each vector previousMembershipMatrix = membershipMatrix.copy() ; for (i = 0 ; i < membershipMatrix.rows ; i++) { for (j = 0 ; j < k ; j++) { membershipMatrix.mtx[i][j] = 0; for (l = 0 ; l < k ; l++) { if (euclidianDistance(vectors[i] , centroids[l]) == 0) tmp = 0 ; else tmp = euclidianDistance(vectors[i] , centroids[j]) / euclidianDistance(vectors[i] , centroids[l]) ; tmp = Math.pow (tmp, 2/(fuzziness-1)) ; membershipMatrix.mtx[i][j] += tmp ; } if (membershipMatrix.mtx[i][j] > 0) membershipMatrix.mtx[i][j] = 1 / membershipMatrix.mtx[i][j] ; } } // check convergence max = -1 ; diff; for (i = 0 ; i < membershipMatrix.rows ; i++) for (j = 0 ; j < membershipMatrix.cols ; j++) { diff = Math.abs(membershipMatrix.mtx[i][j] - previousMembershipMatrix.mtx[i][j]) ; if (diff > max) max = diff ; } if (max < epsilon) repeat = false ; nb_iters++ ; // check nb of iters if (nb_iters > figue.FCMEANS_MAX_ITERATIONS) repeat = false ; } return { 'centroids': centroids , 'membershipMatrix': membershipMatrix} ; } function Matrix (rows,cols) { this.rows = rows ; this.cols = cols ; this.mtx = new Array(rows) ; for (var i = 0 ; i < rows ; i++) { var row = new Array(cols) ; for (var j = 0 ; j < cols ; j++) row[j] = 0; this.mtx[i] = row ; } } function Node (label,left,right,dist, centroid) { this.label = label ; this.left = left ; this.right = right ; this.dist = dist ; this.centroid = centroid ; if (left == null && right == null) { this.size = 1 ; this.depth = 0 ; } else { this.size = left.size + right.size ; this.depth = 1 + Math.max (left.depth , right.depth ) ; } } return { SINGLE_LINKAGE: 0, COMPLETE_LINKAGE: 1, AVERAGE_LINKAGE:2 , EUCLIDIAN_DISTANCE: 0, MANHATTAN_DISTANCE: 1, MAX_DISTANCE: 2, PRINT_VECTOR_VALUE_PRECISION: 2, KMEANS_MAX_ITERATIONS: 10, FCMEANS_MAX_ITERATIONS: 3, Matrix: Matrix, Node: Node, generateDendogram: generateDendogram, agglomerate: agglomerate, kmeans: kmeans, fcmeans: fcmeans } }() ; figue.Matrix.prototype.toString = function() { var lines = [] ; for (var i = 0 ; i < this.rows ; i++) lines.push (this.mtx[i].join("\t")) ; return lines.join ("\n") ; } figue.Matrix.prototype.copy = function() { var duplicate = new figue.Matrix(this.rows, this.cols) ; for (var i = 0 ; i < this.rows ; i++) duplicate.mtx[i] = this.mtx[i].slice(0); return duplicate ; } figue.Node.prototype.isLeaf = function() { if ((this.left == null) && (this.right == null)) return true ; else return false ; } figue.Node.prototype.buildDendogram = function (sep, balanced,withLabel,withCentroid, withDistance) { lines = figue.generateDendogram(this, sep, balanced,withLabel,withCentroid, withDistance) ; return lines.join ("\n") ; } Array.prototype.compare = function(testArr) { if (this.length != testArr.length) return false; for (var i = 0; i < testArr.length; i++) { if (this[i].compare) { if (!this[i].compare(testArr[i])) return false; } if (this[i] !== testArr[i]) return false; } return true; }