var n = {}; n.endsWithChar = function ends_with_char(e, n) { return n.length > 1 ? n.indexOf(e.slice(-1)) > -1 : e.slice(-1) === n; }; n.endsWith = function ends_with(e, n) { return e.slice(e.length - n.length) === n; }; var i = {}; var t; var r = [ 'al', 'adj', 'assn', 'Ave', 'BSc', 'MSc', 'Cell', 'Ch', 'Co', 'cc', 'Corp', 'Dem', 'Dept', 'ed', 'eg', 'Eq', 'Eqs', 'est', 'est', 'etc', 'Ex', 'ext', 'Fig', 'fig', 'Figs', 'figs', 'i.e', 'ie', 'Inc', 'inc', 'Jan', 'Feb', 'Mar', 'Apr', 'Jun', 'Jul', 'Aug', 'Sep', 'Sept', 'Oct', 'Nov', 'Dec', 'jr', 'mi', 'Miss', 'Mrs', 'Mr', 'Ms', 'Mol', 'mt', 'mts', 'no', 'Nos', 'PhD', 'MD', 'BA', 'MA', 'MM', 'pl', 'pop', 'pp', 'Prof', 'Dr', 'pt', 'Ref', 'Refs', 'Rep', 'repr', 'rev', 'Sec', 'Secs', 'Sgt', 'Col', 'Gen', 'Rep', 'Sen', 'Gov', 'Lt', 'Maj', 'Capt', 'St', 'Sr', 'sr', 'Jr', 'jr', 'Rev', 'Sun', 'Mon', 'Tu', 'Tue', 'Tues', 'Wed', 'Th', 'Thu', 'Thur', 'Thurs', 'Fri', 'Sat', 'trans', 'Univ', 'Viz', 'Vol', 'vs', 'v', ]; i.setAbbreviations = function (e) { t = e || r; }; var s = (i.isCapitalized = function (e) { return /^[A-Z][a-z].*/.test(e) || a(e); }); i.isSentenceStarter = function (e) { return s(e) || /``|"|'/.test(e.substring(0, 2)); }; i.isCommonAbbreviation = function (e) { var n = e.replace(/[-'`~!@#$%^&*()_|+=?;:'",.<>\{\}\[\]\\\/]/gi, ''); return ~t.indexOf(n); }; i.isTimeAbbreviation = function (e, n) { if ('a.m.' === e || 'p.m.' === e) { var i = n.replace(/\W+/g, '').slice(-3).toLowerCase(); if ('day' === i) return true; } return false; }; i.isDottedAbbreviation = function (e) { var n = e.replace(/[\(\)\[\]\{\}]/g, '').match(/(.\.)*/); return n && n[0].length > 0; }; i.isCustomAbbreviation = function (e) { return e.length <= 3 || s(e); }; i.isNameAbbreviation = function (e, n) { if (n.length > 0) { if (e < 5 && n[0].length < 6 && s(n[0])) return true; var i = n.filter(function (e) { return /[A-Z]/.test(e.charAt(0)); }); return i.length >= 3; } return false; }; var a = (i.isNumber = function (e, n) { n && (e = e.slice(n - 1, n + 2)); return !isNaN(e); }); i.isPhoneNr = function (e) { return e.match( /^(?:(?:\+?1\s*(?:[.-]\s*)?)?(?:\(\s*([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9])\s*\)|([2-9]1[02-9]|[2-9][02-8]1|[2-9][02-8][02-9]))\s*(?:[.-]\s*)?)?([2-9]1[02-9]|[2-9][02-9]1|[2-9][02-9]{2})\s*(?:[.-]\s*)?([0-9]{4})(?:\s*(?:#|x\.?|ext\.?|extension)\s*(\d+))?$/ ); }; i.isURL = function (e) { return e.match( /[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)/ ); }; i.isConcatenated = function (e) { var n = 0; if ( (n = e.indexOf('.')) > -1 || (n = e.indexOf('!')) > -1 || (n = e.indexOf('?')) > -1 ) { var i = e.charAt(n + 1); if (i.match(/[a-zA-Z].*/)) return [e.slice(0, n), e.slice(n + 1)]; } return false; }; i.isBoundaryChar = function (e) { return '.' === e || '!' === e || '?' === e; }; var u = {}; var f = n; var c = i; var h = ' @~@ '; var v = h.trim(); var p = new RegExp('\\S', ''); var b = new RegExp('\\n+|[-#=_+*]{4,}', 'g'); var d = new RegExp('\\S+|\\n', 'g'); u.sentences = function (e, n) { if (!e || 'string' !== typeof e || !e.length) return []; if (!p.test(e)) return []; var i = { newline_boundaries: false, html_boundaries: false, html_boundaries_tags: ['p', 'div', 'ul', 'ol'], sanitize: false, allowed_tags: false, preserve_whitespace: false, abbreviations: null, }; if ('boolean' === typeof n) i.newline_boundaries = true; else for (var t in n) i[t] = n[t]; c.setAbbreviations(i.abbreviations); i.newline_boundaries && (e = e.replace(b, h)); if (i.html_boundaries) { var r = '(|<\\/(' + i.html_boundaries_tags.join('|') + ')>)'; var s = new RegExp(r, 'g'); e = e.replace(s, '$1' + h); } if (i.sanitize || i.allowed_tags) { i.allowed_tags || (i.allowed_tags = ['']); e = l(e, { allowedTags: i.allowed_tags }); } var a; var o; if (i.preserve_whitespace) { o = e.split(/(|\S+|\n+)/); a = o.filter(function (e, n) { return n % 2; }); } else a = e.trim().match(d); var u = 0; var g = 0; var m = []; var _ = []; var A = []; if (!a || !a.length) return []; for (var w = 0, C = a.length; w < C; w++) { u++; A.push(a[w]); ~a[w].indexOf(',') && (u = 0); if (c.isBoundaryChar(a[w]) || f.endsWithChar(a[w], '?!') || a[w] === v) { (i.newline_boundaries || i.html_boundaries) && a[w] === v && A.pop(); _.push(A); u = 0; A = []; } else { (f.endsWithChar(a[w], '"') || f.endsWithChar(a[w], '”')) && (a[w] = a[w].slice(0, -1)); if (f.endsWithChar(a[w], '.')) { if (w + 1 < C) { if (2 === a[w].length && isNaN(a[w].charAt(0))) continue; if (c.isCommonAbbreviation(a[w])) continue; if (c.isSentenceStarter(a[w + 1])) { if (c.isTimeAbbreviation(a[w], a[w + 1])) continue; if (c.isNameAbbreviation(u, a.slice(w, 6))) continue; if (c.isNumber(a[w + 1]) && c.isCustomAbbreviation(a[w])) continue; } else { if (f.endsWith(a[w], '..')) continue; if (c.isDottedAbbreviation(a[w])) continue; if (c.isNameAbbreviation(u, a.slice(w, 5))) continue; } } _.push(A); A = []; u = 0; } else { if ((g = a[w].indexOf('.')) > -1) { if (c.isNumber(a[w], g)) continue; if (c.isDottedAbbreviation(a[w])) continue; if (c.isURL(a[w]) || c.isPhoneNr(a[w])) continue; } if ((m = c.isConcatenated(a[w]))) { A.pop(); A.push(m[0]); _.push(A); A = []; u = 0; A.push(m[1]); } } } } A.length && _.push(A); _ = _.filter(function (e) { return e.length > 0; }); var S = _.slice(1).reduce( function (e, n) { var i = e[e.length - 1]; if (1 === i.length && /^.{1,2}[.]$/.test(i[0]) && !/[.]/.test(n[0])) { e.pop(); e.push(i.concat(n)); return e; } e.push(n); return e; }, [_[0]] ); return S.map(function (e, n) { if (i.preserve_whitespace && !i.newline_boundaries && !i.html_boundaries) { var t = 2 * e.length; 0 === n && (t += 1); return o.splice(0, t).join(''); } return e.join(' '); }); }; const g = u.sentences; export default u; export { g as sentences }; //# sourceMappingURL=tokenizer.js.map