|
|
|
|
|
|
|
|
|
|
|
import * as util from '../../util'; |
|
import clusteringDistance from './clustering-distances'; |
|
|
|
const defaults = util.defaults({ |
|
distance: 'euclidean', |
|
linkage: 'min', |
|
mode: 'threshold', |
|
|
|
threshold: Infinity, |
|
|
|
addDendrogram: false, |
|
dendrogramDepth: 0, |
|
attributes: [] |
|
}); |
|
|
|
const linkageAliases = { |
|
'single': 'min', |
|
'complete': 'max' |
|
}; |
|
|
|
let setOptions = ( options ) => { |
|
let opts = defaults( options ); |
|
|
|
let preferredAlias = linkageAliases[ opts.linkage ]; |
|
|
|
if( preferredAlias != null ){ |
|
opts.linkage = preferredAlias; |
|
} |
|
|
|
return opts; |
|
}; |
|
|
|
let mergeClosest = function( clusters, index, dists, mins, opts ) { |
|
|
|
let minKey = 0; |
|
let min = Infinity; |
|
let dist; |
|
let attrs = opts.attributes; |
|
|
|
let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2), n1, n2 ); |
|
|
|
for ( let i = 0; i < clusters.length; i++ ) { |
|
let key = clusters[i].key; |
|
let dist = dists[key][mins[key]]; |
|
if ( dist < min ) { |
|
minKey = key; |
|
min = dist; |
|
} |
|
} |
|
if ( (opts.mode === 'threshold' && min >= opts.threshold) || |
|
(opts.mode === 'dendrogram' && clusters.length === 1) ) { |
|
return false; |
|
} |
|
|
|
let c1 = index[minKey]; |
|
let c2 = index[mins[minKey]]; |
|
let merged; |
|
|
|
|
|
if ( opts.mode === 'dendrogram' ) { |
|
merged = { |
|
left: c1, |
|
right: c2, |
|
key: c1.key |
|
}; |
|
} |
|
else { |
|
merged = { |
|
value: c1.value.concat(c2.value), |
|
key: c1.key |
|
}; |
|
} |
|
|
|
clusters[c1.index] = merged; |
|
clusters.splice(c2.index, 1); |
|
|
|
index[c1.key] = merged; |
|
|
|
|
|
for ( let i = 0; i < clusters.length; i++ ) { |
|
let cur = clusters[i]; |
|
|
|
if ( c1.key === cur.key ) { |
|
dist = Infinity; |
|
} |
|
else if ( opts.linkage === 'min' ) { |
|
dist = dists[c1.key][cur.key]; |
|
if ( dists[c1.key][cur.key] > dists[c2.key][cur.key] ) { |
|
dist = dists[c2.key][cur.key]; |
|
} |
|
} |
|
else if ( opts.linkage === 'max' ) { |
|
dist = dists[c1.key][cur.key]; |
|
if ( dists[c1.key][cur.key] < dists[c2.key][cur.key] ) { |
|
dist = dists[c2.key][cur.key]; |
|
} |
|
} |
|
else if ( opts.linkage === 'mean' ) { |
|
dist = (dists[c1.key][cur.key] * c1.size + dists[c2.key][cur.key] * c2.size) / (c1.size + c2.size); |
|
} |
|
else { |
|
if ( opts.mode === 'dendrogram' ) |
|
dist = getDist( cur.value, c1.value ); |
|
else |
|
dist = getDist( cur.value[0], c1.value[0] ); |
|
} |
|
|
|
dists[c1.key][cur.key] = dists[cur.key][c1.key] = dist; |
|
} |
|
|
|
|
|
for ( let i = 0; i < clusters.length; i++ ) { |
|
let key1 = clusters[i].key; |
|
if ( mins[key1] === c1.key || mins[key1] === c2.key ) { |
|
let min = key1; |
|
for ( let j = 0; j < clusters.length; j++ ) { |
|
let key2 = clusters[j].key; |
|
if ( dists[key1][key2] < dists[key1][min] ) { |
|
min = key2; |
|
} |
|
} |
|
mins[key1] = min; |
|
} |
|
clusters[i].index = i; |
|
} |
|
|
|
|
|
c1.key = c2.key = c1.index = c2.index = null; |
|
|
|
return true; |
|
}; |
|
|
|
let getAllChildren = function( root, arr, cy ) { |
|
|
|
if ( !root ) |
|
return; |
|
|
|
if ( root.value ) { |
|
arr.push( root.value ); |
|
} |
|
else { |
|
if ( root.left ) |
|
getAllChildren( root.left, arr, cy ); |
|
if ( root.right ) |
|
getAllChildren( root.right, arr, cy ); |
|
} |
|
}; |
|
|
|
let buildDendrogram = function ( root, cy ) { |
|
|
|
if ( !root ) |
|
return ''; |
|
|
|
if ( root.left && root.right ) { |
|
|
|
let leftStr = buildDendrogram( root.left, cy ); |
|
let rightStr = buildDendrogram( root.right, cy ); |
|
|
|
let node = cy.add({group:'nodes', data: {id: leftStr + ',' + rightStr}}); |
|
|
|
cy.add({group:'edges', data: { source: leftStr, target: node.id() }}); |
|
cy.add({group:'edges', data: { source: rightStr, target: node.id() }}); |
|
|
|
return node.id(); |
|
} |
|
else if ( root.value ) { |
|
return root.value.id(); |
|
} |
|
|
|
}; |
|
|
|
let buildClustersFromTree = function( root, k, cy ) { |
|
|
|
if ( !root ) |
|
return []; |
|
|
|
let left = [], right = [], leaves = []; |
|
|
|
if ( k === 0 ) { |
|
if ( root.left ) |
|
getAllChildren( root.left, left, cy ); |
|
if ( root.right ) |
|
getAllChildren( root.right, right, cy ); |
|
|
|
leaves = left.concat(right); |
|
return [ cy.collection(leaves) ]; |
|
} |
|
else if ( k === 1 ) { |
|
|
|
if ( root.value ) { |
|
return [ cy.collection( root.value ) ]; |
|
} |
|
else { |
|
if ( root.left ) |
|
getAllChildren( root.left, left, cy ); |
|
if ( root.right ) |
|
getAllChildren( root.right, right, cy ); |
|
|
|
return [ cy.collection(left), cy.collection(right) ]; |
|
} |
|
} |
|
else { |
|
if ( root.value ) { |
|
return [ cy.collection(root.value) ]; |
|
} |
|
else { |
|
if ( root.left ) |
|
left = buildClustersFromTree( root.left, k - 1, cy ); |
|
if ( root.right ) |
|
right = buildClustersFromTree( root.right, k - 1, cy ); |
|
|
|
return left.concat(right); |
|
} |
|
} |
|
}; |
|
|
|
if( process.env.NODE_ENV !== 'production' ){ |
|
let printMatrix = function( M ) { |
|
let n = M.length; |
|
for(let i = 0; i < n; i++ ) { |
|
let row = ''; |
|
for ( let j = 0; j < n; j++ ) { |
|
row += Math.round(M[i][j]*100)/100 + ' '; |
|
} |
|
console.log(row); |
|
} |
|
console.log(''); |
|
}; |
|
} |
|
|
|
let hierarchicalClustering = function( options ){ |
|
let cy = this.cy(); |
|
let nodes = this.nodes(); |
|
|
|
|
|
let opts = setOptions( options ); |
|
|
|
let attrs = opts.attributes; |
|
let getDist = (n1, n2) => clusteringDistance( opts.distance, attrs.length, i => attrs[i](n1), i => attrs[i](n2), n1, n2 ); |
|
|
|
|
|
let clusters = []; |
|
let dists = []; |
|
let mins = []; |
|
let index = []; |
|
|
|
|
|
for ( let n = 0; n < nodes.length; n++ ) { |
|
let cluster = { |
|
value: (opts.mode === 'dendrogram') ? nodes[n] : [ nodes[n] ], |
|
key: n, |
|
index: n |
|
}; |
|
clusters[n] = cluster; |
|
index[n] = cluster; |
|
dists[n] = []; |
|
mins[n] = 0; |
|
} |
|
|
|
|
|
for ( let i = 0; i < clusters.length; i++ ) { |
|
for ( let j = 0; j <= i; j++ ) { |
|
let dist; |
|
|
|
if ( opts.mode === 'dendrogram' ){ |
|
dist = (i === j) ? Infinity : getDist( clusters[i].value, clusters[j].value ); |
|
} else { |
|
dist = (i === j) ? Infinity : getDist( clusters[i].value[0], clusters[j].value[0] ); |
|
} |
|
|
|
dists[i][j] = dist; |
|
dists[j][i] = dist; |
|
|
|
if ( dist < dists[i][mins[i]] ) { |
|
mins[i] = j; |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
let merged = mergeClosest( clusters, index, dists, mins, opts ); |
|
while ( merged ) { |
|
merged = mergeClosest( clusters, index, dists, mins, opts ); |
|
} |
|
|
|
let retClusters; |
|
|
|
|
|
|
|
if ( opts.mode === 'dendrogram') { |
|
retClusters = buildClustersFromTree( clusters[0], opts.dendrogramDepth, cy ); |
|
|
|
if ( opts.addDendrogram ) |
|
buildDendrogram( clusters[0], cy ); |
|
} |
|
else { |
|
|
|
retClusters = new Array(clusters.length); |
|
clusters.forEach( function( cluster, i ) { |
|
|
|
cluster.key = cluster.index = null; |
|
|
|
retClusters[i] = cy.collection( cluster.value ); |
|
}); |
|
} |
|
|
|
return retClusters; |
|
}; |
|
|
|
export default { hierarchicalClustering, hca: hierarchicalClustering }; |
|
|