Created
May 3, 2012 09:22
-
-
Save lsauer/2584646 to your computer and use it in GitHub Desktop.
JavaScript implementation of Tanimoto Score / Index / Similarity from binary feature descriptors
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //lsauer.com lo sauer 2012 - CC-BY-SA v3 | |
| //see also: http://www.daylight.com/dayhtml/doc/theory/theory.finger.html | |
| //description: JavaScript/Coffescript implementation for computing TANIMOTO Coefficients | |
| //implementation #1: parallelization amenable, map-reduce like | |
| //heavily depending on JavaScripts's dynamic type casting model | |
| Tanimoto = (key1, key2) -> | |
| nfA = key1.split("").sum() or 0 | |
| nfB = key2.split("").sum() or 0 | |
| nfAB = key1.split("").map((e, i, a) -> | |
| e & key2[i] | |
| ).sum() or 0 | |
| tot = (nfA + nfB - nfAB) | |
| (if tot then nfAB / tot else NaN) | |
| //implementation #2: 'detailed' | |
| Tanimoto = (key1, key2) -> | |
| nfA = .0 | |
| nfB = .0 | |
| nfAB = .0 | |
| score = null | |
| i = 0 | |
| while i < key1.length | |
| unless 0 is key1[i] | |
| nfA++ | |
| nfAB++ unless 0 is key2[i] | |
| nfB++ unless 0 is key2[i] | |
| i++ | |
| unless 0 is (nfA + nfB - nfAB) | |
| score = nfAB / (nfA + nfB - nfAB) | |
| else | |
| score = NaN | |
| score | |
| //Example | |
| Tanimoto "001101001", "000101011" | |
| //> 0.6 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| //lsauer.com lo sauer 2012 - CC-BY-SA v3 | |
| //see also: http://www.daylight.com/dayhtml/doc/theory/theory.finger.html | |
| //description: JavaScript implementation for computing TANIMOTO Coefficients | |
| // What: given are two aligned binary strings of feature descriptors | |
| // Why: the objective is to count the similarities and return a normalized value | |
| // How: requires Babel or binary fingerprint e.g. MDL Information Systems MACCS keys | |
| // see also: http://pubchem.ncbi.nlm.nih.gov/help.html#fingerprints | |
| if(!Array.prototype.sum){ | |
| Array.prototype.sum = function(){ for(var i=0,s=0;i<this.length;s+=parseFloat(this[i++]));return s;} | |
| } | |
| //implementation #1: parallelization amenable, map-reduce like | |
| //heavily depending on JavaScripts's dynamic type casting model | |
| var Tanimoto = function(key1, key2) | |
| { | |
| var nfA = key1.split('').sum() || 0 // #nr of features/ON-bits in molecule A | |
| ,nfB = key2.split('').sum() || 0 // ....... ON in molecule B | |
| // ......... ON in molecule A and B | |
| ,nfAB = key1.split('').map( function(e,i,a){ return e&key2[i]; } ).sum() || 0 | |
| ,tot = (nfA + nfB - nfAB) | |
| ,score = tot ? nfAB / tot : NaN; | |
| return score; | |
| } | |
| //implementation #2: 'detailed' | |
| var Tanimoto = function(key1, key2) | |
| { | |
| var nfA = .0 // #nr of features/ON-bits in molecule A | |
| ,nfB = .0 // ....... ON in molecule B | |
| ,nfAB = .0 // ......... ON in molecule A and B | |
| ,score = null; | |
| for (var i=0; i < key1.length; i++){ | |
| if (0 != key1[i]){ | |
| nfA++ | |
| if (0 != key2[i]){ //feature ON in B, so no diff to count! | |
| nfAB++ | |
| } | |
| } | |
| if (0 != key2[i]){ | |
| nfB++ | |
| } | |
| } | |
| if ( 0 != (nfA + nfB - nfAB) ){ | |
| score = nfAB / (nfA + nfB - nfAB) //normalize result | |
| }else{ score = NaN; } | |
| return score; | |
| } | |
| //Example; returns 0.6 | |
| Tanimoto("001101001", "000101011") | |
| //implementation3: fragment keys into 128bit length blocks; for-loop-shift with '&'-mask to make initial counts; return score | |
| //see: http://xhr2.blogspot.com/2011/09/javascript-binary-to-int-hex-decimal.html | |
| String.prototype.__defineGetter__('d2b', function(){ return parseFloat(this).toString(2); } ); | |
| String.prototype.__defineGetter__('b2d', function(){ return parseInt(this,2); } ); | |
| //... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment