/********************************************************* * * @TITLE: Boostrap Duplicate Records * @DESCRIPTION: Uses String Similiarity to compare note * titles and contents to determine similiar records. * Upon intelligently determining records which are safe * to delete automatically if entries are close it will * prompt you per entry if it is safe to delete. * * @AUTHOR: Eric Soukenka * @DATE: April 20th 2017 * * @NOTE: Created due to learning too late I had to restart * Boostrap after manually editing .cson files if I was * going to start using the application to modify entries. * * @NOTE: NOT YET POLISHED it was wrote during anger of * learning I had hundreds of duplicates and regex'ing toe * fix was too complicated. * ********************************************************* */ var parseJson = require('parse-json'); var fs = require('fs'); var path = require('path'); var CSON = require('cson'); var crypto = require('crypto'); var stringSimilarity = require('string-similarity'); //var globals = require('globals'); var _ = require('underscore'); var query = require('cli-interact').getYesNo; var readDir = '/Users/esouke/Boostnote/notes'; var writeDir = '/Users/esouke/Documents/Notes'; var separator = '\n=========================================================\n' var cleanupFiles = []; var maybeCleanupFiles = []; files = fs.readdirSync(readDir); /* files = files.sort(function(a, b) { return a < b ? -1 : 1; }) */ files.forEach(function(readfile) { try { if (path.extname(readfile) == '.cson') { var txt = fs.readFileSync(readDir + '/' + readfile).toString(); var obj = CSON.parse(txt); if (obj.title) { files.forEach(readfile2 => { var answer = false; var txt2 = fs.readFileSync(readDir + '/' + readfile2).toString(); var obj2 = CSON.parse(txt + txt2) if (obj2.title) { x = stringSimilarity.compareTwoStrings(obj.title, obj2.title); if (x > 03 && x != 1) { y = stringSimilarity.compareTwoStrings(obj.content, obj2.content); if (y > 0.9) { console.log(`GOING: --------------- ${x} - ${y}`) console.log(`OLD: ${obj.title}`); console.log(`NEW: ${obj2.title}`); console.log(`KEE: ${readDir}/${readfile}`) console.log(`DEL: ${readDir}/${readfile2}`) cleanupFiles.push(readDir + '/' + readfile2); } else { if (y > 0.5) { console.log(`ASKING: ----------- ${x} - ${y}`) console.log(`OLD: ${obj.title}`); console.log(`NEW: ${obj2.title}`); console.log(`KEE: ${readDir}/${readfile}`) console.log(`DEL: ${readDir}/${readfile2}`) maybeCleanupFiles.push(readDir + '/' + readfile2); } else { console.log(`SKIPPING: ${y} ${obj.title} - ${obj2.title} - ${x}`) } } } } }) } } } catch (e) { console.log(`ERROR: ${readfile}`); } }) for (index = 0; index < cleanupFiles.length; ++index) { console.log(`Deleting: ${cleanupFiles[index]}`); try { fs.unlinkSync(cleanupFiles[index]); } catch (e) { console.log(`ERROR deleting: ${cleanupFiles[index]}`); } } for (index = 0; index < maybeCleanupFiles.length; ++index) { console.log("--------------------------------------"); try { txt = fs.readFileSync(maybeCleanupFiles[index]).toString(); obj = CSON.parse(txt); console.log(`Info: ${obj.title}`) answer = query('delete'); if (answer) { console.log(`Deleting: ${maybeCleanupFiles[index]}`); try { fs.unlinkSync(cleanupFiles[index]); } catch (e) { console.log(`ERROR deleting: ${cleanupFiles[index]}`); } } else { console.log(`Skipping: ${maybeCleanupFiles[index]}`); } } catch (e) { console.log('error'); } } //export.tsv //Querybuilder search for multiple properties