Skip to content

Instantly share code, notes, and snippets.

@lsauer
Created May 3, 2012 09:22
Show Gist options
  • Select an option

  • Save lsauer/2584646 to your computer and use it in GitHub Desktop.

Select an option

Save lsauer/2584646 to your computer and use it in GitHub Desktop.
JavaScript implementation of Tanimoto Score / Index / Similarity from binary feature descriptors
//lsauer.com lo sauer 2012 - CC-BY-SA v3
//see also: http://www.daylight.com/dayhtml/doc/theory/theory.finger.html
//description: JavaScript/Coffescript implementation for computing TANIMOTO Coefficients
//implementation #1: parallelization amenable, map-reduce like
//heavily depending on JavaScripts's dynamic type casting model
Tanimoto = (key1, key2) ->
nfA = key1.split("").sum() or 0
nfB = key2.split("").sum() or 0
nfAB = key1.split("").map((e, i, a) ->
e & key2[i]
).sum() or 0
tot = (nfA + nfB - nfAB)
(if tot then nfAB / tot else NaN)
//implementation #2: 'detailed'
Tanimoto = (key1, key2) ->
nfA = .0
nfB = .0
nfAB = .0
score = null
i = 0
while i < key1.length
unless 0 is key1[i]
nfA++
nfAB++ unless 0 is key2[i]
nfB++ unless 0 is key2[i]
i++
unless 0 is (nfA + nfB - nfAB)
score = nfAB / (nfA + nfB - nfAB)
else
score = NaN
score
//Example
Tanimoto "001101001", "000101011"
//> 0.6
//lsauer.com lo sauer 2012 - CC-BY-SA v3
//see also: http://www.daylight.com/dayhtml/doc/theory/theory.finger.html
//description: JavaScript implementation for computing TANIMOTO Coefficients
// What: given are two aligned binary strings of feature descriptors
// Why: the objective is to count the similarities and return a normalized value
// How: requires Babel or binary fingerprint e.g. MDL Information Systems MACCS keys
// see also: http://pubchem.ncbi.nlm.nih.gov/help.html#fingerprints
if(!Array.prototype.sum){
Array.prototype.sum = function(){ for(var i=0,s=0;i<this.length;s+=parseFloat(this[i++]));return s;}
}
//implementation #1: parallelization amenable, map-reduce like
//heavily depending on JavaScripts's dynamic type casting model
var Tanimoto = function(key1, key2)
{
var nfA = key1.split('').sum() || 0 // #nr of features/ON-bits in molecule A
,nfB = key2.split('').sum() || 0 // ....... ON in molecule B
// ......... ON in molecule A and B
,nfAB = key1.split('').map( function(e,i,a){ return e&key2[i]; } ).sum() || 0
,tot = (nfA + nfB - nfAB)
,score = tot ? nfAB / tot : NaN;
return score;
}
//implementation #2: 'detailed'
var Tanimoto = function(key1, key2)
{
var nfA = .0 // #nr of features/ON-bits in molecule A
,nfB = .0 // ....... ON in molecule B
,nfAB = .0 // ......... ON in molecule A and B
,score = null;
for (var i=0; i < key1.length; i++){
if (0 != key1[i]){
nfA++
if (0 != key2[i]){ //feature ON in B, so no diff to count!
nfAB++
}
}
if (0 != key2[i]){
nfB++
}
}
if ( 0 != (nfA + nfB - nfAB) ){
score = nfAB / (nfA + nfB - nfAB) //normalize result
}else{ score = NaN; }
return score;
}
//Example; returns 0.6
Tanimoto("001101001", "000101011")
//implementation3: fragment keys into 128bit length blocks; for-loop-shift with '&'-mask to make initial counts; return score
//see: http://xhr2.blogspot.com/2011/09/javascript-binary-to-int-hex-decimal.html
String.prototype.__defineGetter__('d2b', function(){ return parseFloat(this).toString(2); } );
String.prototype.__defineGetter__('b2d', function(){ return parseInt(this,2); } );
//...
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment