Skip to content

Instantly share code, notes, and snippets.

@lsauer
Created May 3, 2012 09:22
Show Gist options
  • Select an option

  • Save lsauer/2584646 to your computer and use it in GitHub Desktop.

Select an option

Save lsauer/2584646 to your computer and use it in GitHub Desktop.

Revisions

  1. lsauer revised this gist May 22, 2012. 2 changed files with 4 additions and 4 deletions.
    4 changes: 2 additions & 2 deletions tanimoto.cs
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,6 @@
    //lsauer.com lo sauer 2012 - CC-BY-SA v3
    //
    //JavaScript/Coffescript implementation for computing TANIMOTO Coefficients
    //see also: http://www.daylight.com/dayhtml/doc/theory/theory.finger.html
    //description: JavaScript/Coffescript implementation for computing TANIMOTO Coefficients
    //implementation #1: parallelization amenable, map-reduce like
    //heavily depending on JavaScripts's dynamic type casting model
    Tanimoto = (key1, key2) ->
    4 changes: 2 additions & 2 deletions tanimoto.js
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,6 @@
    //lsauer.com lo sauer 2012 - CC-BY-SA v3
    //
    //JavaScript implementation for computing TANIMOTO Coefficients
    //see also: http://www.daylight.com/dayhtml/doc/theory/theory.finger.html
    //description: JavaScript implementation for computing TANIMOTO Coefficients
    // What: given are two aligned binary strings of feature descriptors
    // Why: the objective is to count the similarities and return a normalized value
    // How: requires Babel or binary fingerprint e.g. MDL Information Systems MACCS keys
  2. lsauer revised this gist May 9, 2012. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions tanimoto.js
    Original file line number Diff line number Diff line change
    @@ -3,8 +3,8 @@
    //JavaScript implementation for computing TANIMOTO Coefficients
    // What: given are two aligned binary strings of feature descriptors
    // Why: the objective is to count the similarities and return a normalized value
    // How: requires Babel or MDL Information Systems MACCS keys

    // How: requires Babel or binary fingerprint e.g. MDL Information Systems MACCS keys
    // see also: http://pubchem.ncbi.nlm.nih.gov/help.html#fingerprints
    if(!Array.prototype.sum){
    Array.prototype.sum = function(){ for(var i=0,s=0;i<this.length;s+=parseFloat(this[i++]));return s;}
    }
    @@ -56,4 +56,4 @@ Tanimoto("001101001", "000101011")
    //see: http://xhr2.blogspot.com/2011/09/javascript-binary-to-int-hex-decimal.html
    String.prototype.__defineGetter__('d2b', function(){ return parseFloat(this).toString(2); } );
    String.prototype.__defineGetter__('b2d', function(){ return parseInt(this,2); } );
    //...
    //...
  3. lsauer revised this gist May 3, 2012. 1 changed file with 59 additions and 0 deletions.
    59 changes: 59 additions & 0 deletions tanimoto.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,59 @@
    //lsauer.com lo sauer 2012 - CC-BY-SA v3
    //
    //JavaScript implementation for computing TANIMOTO Coefficients
    // What: given are two aligned binary strings of feature descriptors
    // Why: the objective is to count the similarities and return a normalized value
    // How: requires Babel or MDL Information Systems MACCS keys

    if(!Array.prototype.sum){
    Array.prototype.sum = function(){ for(var i=0,s=0;i<this.length;s+=parseFloat(this[i++]));return s;}
    }

    //implementation #1: parallelization amenable, map-reduce like
    //heavily depending on JavaScripts's dynamic type casting model
    var Tanimoto = function(key1, key2)
    {
    var nfA = key1.split('').sum() || 0 // #nr of features/ON-bits in molecule A
    ,nfB = key2.split('').sum() || 0 // ....... ON in molecule B
    // ......... ON in molecule A and B
    ,nfAB = key1.split('').map( function(e,i,a){ return e&key2[i]; } ).sum() || 0
    ,tot = (nfA + nfB - nfAB)
    ,score = tot ? nfAB / tot : NaN;
    return score;
    }

    //implementation #2: 'detailed'
    var Tanimoto = function(key1, key2)
    {
    var nfA = .0 // #nr of features/ON-bits in molecule A
    ,nfB = .0 // ....... ON in molecule B
    ,nfAB = .0 // ......... ON in molecule A and B
    ,score = null;

    for (var i=0; i < key1.length; i++){
    if (0 != key1[i]){
    nfA++
    if (0 != key2[i]){ //feature ON in B, so no diff to count!
    nfAB++
    }
    }
    if (0 != key2[i]){
    nfB++
    }
    }
    if ( 0 != (nfA + nfB - nfAB) ){
    score = nfAB / (nfA + nfB - nfAB) //normalize result
    }else{ score = NaN; }

    return score;
    }

    //Example; returns 0.6
    Tanimoto("001101001", "000101011")


    //implementation3: fragment keys into 128bit length blocks; for-loop-shift with '&'-mask to make initial counts; return score
    //see: http://xhr2.blogspot.com/2011/09/javascript-binary-to-int-hex-decimal.html
    String.prototype.__defineGetter__('d2b', function(){ return parseFloat(this).toString(2); } );
    String.prototype.__defineGetter__('b2d', function(){ return parseInt(this,2); } );
    //...
  4. lsauer created this gist May 3, 2012.
    37 changes: 37 additions & 0 deletions tanimoto.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,37 @@
    //lsauer.com lo sauer 2012 - CC-BY-SA v3
    //
    //JavaScript/Coffescript implementation for computing TANIMOTO Coefficients
    //implementation #1: parallelization amenable, map-reduce like
    //heavily depending on JavaScripts's dynamic type casting model
    Tanimoto = (key1, key2) ->
    nfA = key1.split("").sum() or 0
    nfB = key2.split("").sum() or 0
    nfAB = key1.split("").map((e, i, a) ->
    e & key2[i]
    ).sum() or 0
    tot = (nfA + nfB - nfAB)
    (if tot then nfAB / tot else NaN)

    //implementation #2: 'detailed'
    Tanimoto = (key1, key2) ->
    nfA = .0
    nfB = .0
    nfAB = .0
    score = null
    i = 0

    while i < key1.length
    unless 0 is key1[i]
    nfA++
    nfAB++ unless 0 is key2[i]
    nfB++ unless 0 is key2[i]
    i++
    unless 0 is (nfA + nfB - nfAB)
    score = nfAB / (nfA + nfB - nfAB)
    else
    score = NaN
    score

    //Example
    Tanimoto "001101001", "000101011"
    //> 0.6