diff --git a/js/nlp/compromise.js b/js/nlp/compromise.js new file mode 100644 index 0000000..ab96729 --- /dev/null +++ b/js/nlp/compromise.js @@ -0,0 +1,14069 @@ +/* compromise 13.5.0X004 MIT/blab */ +/* https://github.com/spencermountain/compromise */ + +/* polyfills */ +Object.addProperty(Array,'findIndex', function(callback) { + if (this === null) { + throw new TypeError('Array.prototype.findIndex called on null or undefined'); + } else if (typeof callback !== 'function') { + throw new TypeError('callback must be a function'); + } + var list = Object(this); + // Makes sures is always has an positive integer as length. + var length = list.length >>> 0; + var thisArg = arguments[1]; + for (var i = 0; i < length; i++) { + if ( callback.call(thisArg, list[i], i, list) ) { + return i; + } + } + return -1; +}); + +/* NLP module */ +(function (global, factory) { + typeof exports === 'object' && typeof module !== 'undefined' ? module.exports = factory() : + typeof define === 'function' && define.amd ? define(factory) : + (global = typeof globalThis !== 'undefined' ? globalThis : global || self, global.nlp = factory()); +}(this, (function () { 'use strict'; + function similar_text (first, second, percent) { + // http://kevin.vanzonneveld.net + // + original by: Rafał Kukawski (http://blog.kukawski.pl) + // + bugfixed by: Chris McMacken + // + added percent parameter by: Markus Padourek (taken from http://www.kevinhq.com/2012/06/php-similartext-function-in-javascript_16.html) + // * example 1: similar_text('Hello World!', 'Hello phpjs!'); + // * returns 1: 7 + // * example 2: similar_text('Hello World!', null); + // * returns 2: 0 + // * example 3: similar_text('Hello World!', null, 1); + // * returns 3: 58.33 + if (first === null || second === null || typeof first === 'undefined' || typeof second === 'undefined') { + return 0; + } + + first += ''; + second += ''; + + var pos1 = 0, + pos2 = 0, + max = 0, + firstLength = first.length, + secondLength = second.length, + p, q, l, sum; + + max = 0; + + for (p = 0; p < firstLength; p++) { + for (q = 0; q < secondLength; q++) { + for (l = 0; + (p + l < firstLength) && (q + l < secondLength) && (first.charAt(p + l) === second.charAt(q + l)); l++); + if (l > max) { + max = l; + pos1 = p; + pos2 = q; + } + } + } + + sum = max; + + if (sum) { + if (pos1 && pos2) { + sum += similar_text(first.substr(0, pos2), second.substr(0, pos2)); + } + + if ((pos1 + max < firstLength) && (pos2 + max < secondLength)) { + sum += similar_text(first.substr(pos1 + max, firstLength - pos1 - max), second.substr(pos2 + max, secondLength - pos2 - max)); + } + } + + if (!percent) { + return sum; + } else { + return (sum * 200) / (firstLength + secondLength); + } + } + + function _typeof(obj) { + "@babel/helpers - typeof"; + + if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { + _typeof = function (obj) { + return typeof obj; + }; + } else { + _typeof = function (obj) { + return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; + }; + } + + return _typeof(obj); + } + + function _classCallCheck(instance, Constructor) { + if (!(instance instanceof Constructor)) { + throw new TypeError("Cannot call a class as a function"); + } + } + + function _defineProperties(target, props) { + for (var i = 0; i < props.length; i++) { + var descriptor = props[i]; + descriptor.enumerable = descriptor.enumerable || false; + descriptor.configurable = true; + if ("value" in descriptor) descriptor.writable = true; + Object.defineProperty(target, descriptor.key, descriptor); + } + } + + function _createClass(Constructor, protoProps, staticProps) { + if (protoProps) _defineProperties(Constructor.prototype, protoProps); + if (staticProps) _defineProperties(Constructor, staticProps); + return Constructor; + } + + function _inherits(subClass, superClass) { + if (typeof superClass !== "function" && superClass !== null) { + throw new TypeError("Super expression must either be null or a function"); + } + + subClass.prototype = Object.create(superClass && superClass.prototype, { + constructor: { + value: subClass, + writable: true, + configurable: true + } + }); + if (superClass) _setPrototypeOf(subClass, superClass); + } + + function _getPrototypeOf(o) { + _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { + return o.__proto__ || Object.getPrototypeOf(o); + }; + return _getPrototypeOf(o); + } + + function _setPrototypeOf(o, p) { + _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { + o.__proto__ = p; + return o; + }; + + return _setPrototypeOf(o, p); + } + + function _isNativeReflectConstruct() { + if (typeof Reflect === "undefined" || !Reflect.construct) return false; + if (Reflect.construct.sham) return false; + if (typeof Proxy === "function") return true; + + try { + Date.prototype.toString.call(Reflect.construct(Date, [], function () {})); + return true; + } catch (e) { + return false; + } + } + + function _assertThisInitialized(self) { + if (self === void 0) { + throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); + } + + return self; + } + + function _possibleConstructorReturn(self, call) { + if (call && (typeof call === "object" || typeof call === "function")) { + return call; + } + + return _assertThisInitialized(self); + } + + function _createSuper(Derived) { + var hasNativeReflectConstruct = _isNativeReflectConstruct(); + + return function _createSuperInternal() { + var Super = _getPrototypeOf(Derived), + result; + + if (hasNativeReflectConstruct) { + var NewTarget = _getPrototypeOf(this).constructor; + + result = Reflect.construct(Super, arguments, NewTarget); + } else { + result = Super.apply(this, arguments); + } + + return _possibleConstructorReturn(this, result); + }; + } + + function _slicedToArray(arr, i) { + return _arrayWithHoles(arr) || _iterableToArrayLimit(arr, i) || _unsupportedIterableToArray(arr, i) || _nonIterableRest(); + } + + function _arrayWithHoles(arr) { + if (Array.isArray(arr)) return arr; + } + + function _iterableToArrayLimit(arr, i) { + if (typeof Symbol === "undefined" || !(Symbol.iterator in Object(arr))) return; + var _arr = []; + var _n = true; + var _d = false; + var _e = undefined; + + try { + for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) { + _arr.push(_s.value); + + if (i && _arr.length === i) break; + } + } catch (err) { + _d = true; + _e = err; + } finally { + try { + if (!_n && _i["return"] != null) _i["return"](); + } finally { + if (_d) throw _e; + } + } + + return _arr; + } + + function _unsupportedIterableToArray(o, minLen) { + if (!o) return; + if (typeof o === "string") return _arrayLikeToArray(o, minLen); + var n = Object.prototype.toString.call(o).slice(8, -1); + if (n === "Object" && o.constructor) n = o.constructor.name; + if (n === "Map" || n === "Set") return Array.from(o); + if (n === "Arguments" || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(n)) return _arrayLikeToArray(o, minLen); + } + + function _arrayLikeToArray(arr, len) { + if (len == null || len > arr.length) len = arr.length; + + for (var i = 0, arr2 = new Array(len); i < len; i++) arr2[i] = arr[i]; + + return arr2; + } + + function _nonIterableRest() { + throw new TypeError("Invalid attempt to destructure non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method."); + } + + //this is a not-well-thought-out way to reduce our dependence on `object===object` stuff + var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'.split(''); //generates a unique id for this term + + function makeId(str) { + str = str || '_'; + var text = str + '-'; + + for (var i = 0; i < 7; i++) { + text += chars[Math.floor(Math.random() * chars.length)]; + } + + return text; + } + + var _id = makeId; + + //a hugely-ignorant, and widely subjective transliteration of latin, cryllic, greek unicode characters to english ascii. + //approximate visual (not semantic or phonetic) relationship between unicode and ascii characters + //http://en.wikipedia.org/wiki/List_of_Unicode_characters + //https://docs.google.com/spreadsheet/ccc?key=0Ah46z755j7cVdFRDM1A2YVpwa1ZYWlpJM2pQZ003M0E + var compact = { + '!': '¡', + '?': '¿Ɂ', + '"': '“”"❝❞', + "'": '‘‛❛❜', + '-': '—–', + a: 'ªÀÁÂÃÄÅàáâãäåĀāĂ㥹ǍǎǞǟǠǡǺǻȀȁȂȃȦȧȺΆΑΔΛάαλАадѦѧӐӑӒӓƛɅæ', + b: 'ßþƀƁƂƃƄƅɃΒβϐϦБВЪЬвъьѢѣҌҍ', + c: '¢©ÇçĆćĈĉĊċČčƆƇƈȻȼͻͼͽϲϹϽϾСсєҀҁҪҫ', + d: 'ÐĎďĐđƉƊȡƋƌǷ', + e: 'ÈÉÊËèéêëĒēĔĕĖėĘęĚěƎƏƐǝȄȅȆȇȨȩɆɇΈΕΞΣέεξϱϵ϶ЀЁЕЭеѐёҼҽҾҿӖӗӘәӚӛӬӭ', + f: 'ƑƒϜϝӺӻҒғſ', + g: 'ĜĝĞğĠġĢģƓǤǥǦǧǴǵ', + h: 'ĤĥĦħƕǶȞȟΉΗЂЊЋНнђћҢңҤҥҺһӉӊ', + I: 'ÌÍÎÏ', + i: 'ìíîïĨĩĪīĬĭĮįİıƖƗȈȉȊȋΊΐΪίιϊІЇії', + j: 'ĴĵǰȷɈɉϳЈј', + k: 'ĶķĸƘƙǨǩΚκЌЖКжкќҚқҜҝҞҟҠҡ', + l: 'ĹĺĻļĽľĿŀŁłƚƪǀǏǐȴȽΙӀӏ', + m: 'ΜϺϻМмӍӎ', + n: 'ÑñŃńŅņŇňʼnŊŋƝƞǸǹȠȵΝΠήηϞЍИЙЛПийлпѝҊҋӅӆӢӣӤӥπ', + o: 'ÒÓÔÕÖØðòóôõöøŌōŎŏŐőƟƠơǑǒǪǫǬǭǾǿȌȍȎȏȪȫȬȭȮȯȰȱΌΘΟθοσόϕϘϙϬϭϴОФоѲѳӦӧӨөӪӫ', + p: 'ƤƿΡρϷϸϼРрҎҏÞ', + q: 'Ɋɋ', + r: 'ŔŕŖŗŘřƦȐȑȒȓɌɍЃГЯгяѓҐґ', + s: 'ŚśŜŝŞşŠšƧƨȘșȿЅѕ', + t: 'ŢţŤťŦŧƫƬƭƮȚțȶȾΓΤτϮТт', + u: 'µÙÚÛÜùúûüŨũŪūŬŭŮůŰűŲųƯưƱƲǓǔǕǖǗǘǙǚǛǜȔȕȖȗɄΰμυϋύ', + v: 'νѴѵѶѷ', + w: 'ŴŵƜωώϖϢϣШЩшщѡѿ', + x: '×ΧχϗϰХхҲҳӼӽӾӿ', + y: 'ÝýÿŶŷŸƳƴȲȳɎɏΎΥΫγψϒϓϔЎУучўѰѱҮүҰұӮӯӰӱӲӳ', + z: 'ŹźŻżŽžƩƵƶȤȥɀΖζ' + }; //decompress data into two hashes + + var unicode = {}; + Object.keys(compact).forEach(function (k) { + compact[k].split('').forEach(function (s) { + unicode[s] = k; + }); + }); + + var killUnicode = function killUnicode(str) { + var chars = str.split(''); + chars.forEach(function (s, i) { + if (unicode[s]) { + chars[i] = unicode[s]; + } + }); + return chars.join(''); + }; + + var unicode_1 = killUnicode; // console.log(killUnicode('bjŏȒk—Ɏó')); + + var periodAcronym = /([A-Z]\.)+[A-Z]?,?$/; + var oneLetterAcronym = /^[A-Z]\.,?$/; + var noPeriodAcronym = /[A-Z]{2,}('s|,)?$/; + var lowerCaseAcronym = /([a-z]\.){2,}[a-z]\.?$/; + + var isAcronym = function isAcronym(str) { + //like N.D.A + if (periodAcronym.test(str) === true) { + return true; + } //like c.e.o + + + if (lowerCaseAcronym.test(str) === true) { + return true; + } //like 'F.' + + + if (oneLetterAcronym.test(str) === true) { + return true; + } //like NDA + + + if (noPeriodAcronym.test(str) === true) { + return true; + } + + return false; + }; + + var isAcronym_1 = isAcronym; + + var hasSlash = /[a-z\u00C0-\u00FF] ?\/ ?[a-z\u00C0-\u00FF]/; + /** some basic operations on a string to reduce noise */ + + var clean = function clean(str) { + str = str || ''; + str = str.toLowerCase(); + str = str.trim(); + var original = str; //(very) rough ASCII transliteration - bjŏrk -> bjork + + str = unicode_1(str); //rough handling of slashes - 'see/saw' + + if (hasSlash.test(str) === true) { + str = str.replace(/\/.*/, ''); + } //#tags, @mentions + + + str = str.replace(/^[#@]/, ''); //punctuation + + str = str.replace(/[,;.!?]+$/, ''); // coerce single curly quotes + + str = str.replace(/[\u0027\u0060\u00B4\u2018\u2019\u201A\u201B\u2032\u2035\u2039\u203A]+/g, "'"); // coerce double curly quotes + + str = str.replace(/[\u0022\u00AB\u00BB\u201C\u201D\u201E\u201F\u2033\u2034\u2036\u2037\u2E42\u301D\u301E\u301F\uFF02]+/g, '"'); //coerce Unicode ellipses + + str = str.replace(/\u2026/g, '...'); //en-dash + + str = str.replace(/\u2013/g, '-'); //lookin'->looking (make it easier for conjugation) + + str = str.replace(/([aeiou][ktrp])in$/, '$1ing'); //turn re-enactment to reenactment + + if (/^(re|un)-?[^aeiou]./.test(str) === true) { + str = str.replace('-', ''); + } //strip leading & trailing grammatical punctuation + + + if (/^[:;]/.test(str) === false) { + str = str.replace(/\.{3,}$/g, ''); + str = str.replace(/[",\.!:;\?\)]+$/g, ''); + str = str.replace(/^['"\(]+/g, ''); + } //do this again.. + + + str = str.trim(); //oh shucks, + + if (str === '') { + str = original; + } //compact acronyms + + + if (isAcronym_1(str)) { + str = str.replace(/\./g, ''); + } //nice-numbers + + + str = str.replace(/([0-9]),([0-9])/g, '$1$2'); + return str; + }; + + var clean_1 = clean; // console.log(normalize('Dr. V Cooper')); + + /** reduced is one step further than clean */ + var reduced = function reduced(str) { + // remove apostrophes + str = str.replace(/['’]s$/, ''); + str = str.replace(/s['’]$/, 's'); + return str; + }; + + var reduce = reduced; + + //all punctuation marks, from https://en.wikipedia.org/wiki/Punctuation + //we have slightly different rules for start/end - like #hashtags. + + var startings = /^[ \n\t\.’'\[\](){}⟨⟩:,،、‒–—―…!.‹›«»‐\-?‘’;\/⁄·&*•^†‡°¡¿※№÷׺ª%‰+−=‱¶′″‴§~|‖¦©℗®℠™¤₳฿\u0022|\uFF02|\u0027|\u201C|\u2018|\u201F|\u201B|\u201E|\u2E42|\u201A|\u00AB|\u2039|\u2035|\u2036|\u2037|\u301D|\u0060|\u301F]+/; + var endings = /[ \n\t\.’'\[\](){}⟨⟩:,،、‒–—―…!.‹›«»‐\-?‘’;\/⁄·&*@•^†‡°¡¿※#№÷׺ª‰+−=‱¶′″‴§~|‖¦©℗®℠™¤₳฿\u0022|\uFF02|\u0027|\u201D|\u2019|\u201D|\u2019|\u201D|\u201D|\u2019|\u00BB|\u203A|\u2032|\u2033|\u2034|\u301E|\u00B4|\u301E]+$/; //money = ₵¢₡₢$₫₯֏₠€ƒ₣₲₴₭₺₾ℳ₥₦₧₱₰£៛₽₹₨₪৳₸₮₩¥ + + var hasSlash$1 = /\//; + var hasApostrophe = /['’]/; + var hasAcronym = /^[a-z]\.([a-z]\.)+/i; + var minusNumber = /^[-+\.][0-9]/; + /** turn given text into a parsed-up object + * seperate the 'meat' of the word from the whitespace+punctuation + */ + + var parseTerm = function parseTerm(str) { + var original = str; + var pre = ''; + var post = ''; + str = str.replace(startings, function (found) { + pre = found; // support '-40' + + if ((pre === '-' || pre === '+' || pre === '.') && minusNumber.test(str)) { + pre = ''; + return found; + } + + return ''; + }); + str = str.replace(endings, function (found) { + post = found; // keep s-apostrophe - "flanders'" or "chillin'" + + if (hasApostrophe.test(found) && /[sn]['’]$/.test(original) && hasApostrophe.test(pre) === false) { + post = post.replace(hasApostrophe, ''); + return "'"; + } //keep end-period in acronym + + + if (hasAcronym.test(str) === true) { + post = post.replace(/\./, ''); + return '.'; + } + + return ''; + }); //we went too far.. + + if (str === '') { + // do a very mild parse, and hope for the best. + original = original.replace(/ *$/, function (after) { + post = after || ''; + return ''; + }); + str = original; + pre = ''; + post = post; + } // create the various forms of our text, + + + var clean = clean_1(str); + var parsed = { + text: str, + clean: clean, + reduced: reduce(clean), + pre: pre, + post: post + }; // support aliases for slashes + + if (hasSlash$1.test(str)) { + str.split(hasSlash$1).forEach(function (word) { + parsed.alias = parsed.alias || {}; + parsed.alias[word.trim()] = true; + }); + } + + return parsed; + }; + + var parse = parseTerm; + + function createCommonjsModule(fn, basedir, module) { + return module = { + path: basedir, + exports: {}, + require: function (path, base) { + return commonjsRequire(path, (base === undefined || base === null) ? module.path : base); + } + }, fn(module, module.exports), module.exports; + } + + function commonjsRequire () { + throw new Error('Dynamic requires are not currently supported by @rollup/plugin-commonjs'); + } + + var _01Case = createCommonjsModule(function (module, exports) { + var titleCase = /^[A-Z][a-z'\u00C0-\u00FF]/; + var upperCase = /^[A-Z]+s?$/; + /** convert all text to uppercase */ + + exports.toUpperCase = function () { + this.text = this.text.toUpperCase(); + return this; + }; + /** convert all text to lowercase */ + + + exports.toLowerCase = function () { + this.text = this.text.toLowerCase(); + return this; + }; + /** only set the first letter to uppercase + * leave any existing uppercase alone + */ + + + exports.toTitleCase = function () { + this.text = this.text.replace(/^ *[a-z\u00C0-\u00FF]/, function (x) { + return x.toUpperCase(); + }); //support unicode? + + return this; + }; + /** if all letters are uppercase */ + + + exports.isUpperCase = function () { + return upperCase.test(this.text); + }; + /** if the first letter is uppercase, and the rest are lowercase */ + + + exports.isTitleCase = function () { + return titleCase.test(this.text); + }; + + exports.titleCase = exports.isTitleCase; + }); + + var _02Punctuation = createCommonjsModule(function (module, exports) { + // these methods are called with '@hasComma' in the match syntax + // various unicode quotation-mark formats + var startQuote = /(\u0022|\uFF02|\u0027|\u201C|\u2018|\u201F|\u201B|\u201E|\u2E42|\u201A|\u00AB|\u2039|\u2035|\u2036|\u2037|\u301D|\u0060|\u301F)/; + var endQuote = /(\u0022|\uFF02|\u0027|\u201D|\u2019|\u201D|\u2019|\u201D|\u201D|\u2019|\u00BB|\u203A|\u2032|\u2033|\u2034|\u301E|\u00B4|\u301E)/; + /** search the term's 'post' punctuation */ + + exports.hasPost = function (punct) { + return this.post.indexOf(punct) !== -1; + }; + /** search the term's 'pre' punctuation */ + + + exports.hasPre = function (punct) { + return this.pre.indexOf(punct) !== -1; + }; + /** does it have a quotation symbol? */ + + + exports.hasQuote = function () { + return startQuote.test(this.pre) || endQuote.test(this.post); + }; + + exports.hasQuotation = exports.hasQuote; + /** does it have a comma? */ + + exports.hasComma = function () { + return this.hasPost(','); + }; + /** does it end in a period? */ + + + exports.hasPeriod = function () { + return this.hasPost('.') === true && this.hasPost('...') === false; + }; + /** does it end in an exclamation */ + + + exports.hasExclamation = function () { + return this.hasPost('!'); + }; + /** does it end with a question mark? */ + + + exports.hasQuestionMark = function () { + return this.hasPost('?') || this.hasPost('¿'); + }; + /** is there a ... at the end? */ + + + exports.hasEllipses = function () { + return this.hasPost('..') || this.hasPost('…') || this.hasPre('..') || this.hasPre('…'); + }; + /** is there a semicolon after this word? */ + + + exports.hasSemicolon = function () { + return this.hasPost(';'); + }; + /** is there a slash '/' in this word? */ + + + exports.hasSlash = function () { + var slash = /\//; + return slash.test(this.text); + }; + /** a hyphen connects two words like-this */ + + + exports.hasHyphen = function () { + var hyphen = /(-|–|—)/; + return hyphen.test(this.post) || hyphen.test(this.pre); + }; + /** a dash separates words - like that */ + + + exports.hasDash = function () { + var hyphen = / (-|–|—) /; + return hyphen.test(this.post) || hyphen.test(this.pre); + }; + /** is it multiple words combinded */ + + + exports.hasContraction = function () { + return Boolean(this.implicit); + }; + /** try to sensibly put this punctuation mark into the term */ + + + exports.addPunctuation = function (punct) { + // dont add doubles + if (punct === ',' || punct === ';') { + this.post = this.post.replace(punct, ''); + } + + this.post = punct + this.post; + return this; + }; + }); + + //declare it up here + var wrapMatch = function wrapMatch() {}; + /** ignore optional/greedy logic, straight-up term match*/ + + + var doesMatch = function doesMatch(t, reg, index, length) { + // support id matches + if (reg.id === t.id) { + return true; + } // support '.' + + + if (reg.anything === true) { + return true; + } // support '^' (in parentheses) + + + if (reg.start === true && index !== 0) { + return false; + } // support '$' (in parentheses) + + + if (reg.end === true && index !== length - 1) { + return false; + } //support a text match + + + if (reg.word !== undefined) { + //match contractions + if (t.implicit !== null && t.implicit === reg.word) { + return true; + } // term aliases for slashes and things + + + if (t.alias !== undefined && t.alias.hasOwnProperty(reg.word)) { + return true; + } // support ~ match + + + if (reg.soft === true && reg.word === t.root) { + return true; + } //match either .clean or .text + + + return reg.word === t.clean || reg.word === t.text || reg.word === t.reduced; + } //support #Tag + + + if (reg.tag !== undefined) { + return t.tags[reg.tag] === true; + } //support @method + + + if (reg.method !== undefined) { + if (typeof t[reg.method] === 'function' && t[reg.method]() === true) { + return true; + } + + return false; + } //support /reg/ + + + if (reg.regex !== undefined) { + return reg.regex.test(t.clean); + } // support optimized (one|two) + + + if (reg.oneOf !== undefined) { + return reg.oneOf.hasOwnProperty(t.reduced) || reg.oneOf.hasOwnProperty(t.text); + } //support (one|two) + + + if (reg.choices !== undefined) { + // try to support && operator + if (reg.operator === 'and') { + // must match them all + return reg.choices.every(function (r) { + return wrapMatch(t, r, index, length); + }); + } // or must match one + + + return reg.choices.some(function (r) { + return wrapMatch(t, r, index, length); + }); + } + + return false; + }; // wrap result for !negative match logic + + + wrapMatch = function wrapMatch(t, reg, index, length) { + var result = doesMatch(t, reg, index, length); + + if (reg.negative === true) { + return !result; + } + + return result; + }; + + var _doesMatch = wrapMatch; + + var boring = {}; + /** check a match object against this term */ + + var doesMatch_1 = function doesMatch_1(reg, index, length) { + return _doesMatch(this, reg, index, length); + }; + /** does this term look like an acronym? */ + + + var isAcronym_1$1 = function isAcronym_1$1() { + return isAcronym_1(this.text); + }; + /** is this term implied by a contraction? */ + + + var isImplicit = function isImplicit() { + return this.text === '' && Boolean(this.implicit); + }; + /** does the term have at least one good tag? */ + + + var isKnown = function isKnown() { + return Object.keys(this.tags).some(function (t) { + return boring[t] !== true; + }); + }; + /** cache the root property of the term */ + + + var setRoot = function setRoot(world) { + var transform = world.transforms; + var str = this.implicit || this.clean; + + if (this.tags.Plural) { + str = transform.toSingular(str, world); + } + + if (this.tags.Verb && !this.tags.Negative && !this.tags.Infinitive) { + var tense = null; + + if (this.tags.PastTense) { + tense = 'PastTense'; + } else if (this.tags.Gerund) { + tense = 'Gerund'; + } else if (this.tags.PresentTense) { + tense = 'PresentTense'; + } else if (this.tags.Participle) { + tense = 'Participle'; + } else if (this.tags.Actor) { + tense = 'Actor'; + } + + str = transform.toInfinitive(str, world, tense); + } + + this.root = str; + }; + + var _03Misc = { + doesMatch: doesMatch_1, + isAcronym: isAcronym_1$1, + isImplicit: isImplicit, + isKnown: isKnown, + setRoot: setRoot + }; + + var hasSpace = /[\s-]/; + var isUpperCase = /^[A-Z-]+$/; // const titleCase = str => { + // return str.charAt(0).toUpperCase() + str.substr(1) + // } + + /** return various text formats of this term */ + + var textOut = function textOut(options, showPre, showPost) { + options = options || {}; + var word = this.text; + var before = this.pre; + var after = this.post; // -word- + + if (options.reduced === true) { + word = this.reduced || ''; + } + + if (options.root === true) { + word = this.root || ''; + } + + if (options.implicit === true && this.implicit) { + word = this.implicit || ''; + } + + if (options.normal === true) { + word = this.clean || this.text || ''; + } + + if (options.root === true) { + word = this.root || this.reduced || ''; + } + + if (options.unicode === true) { + word = unicode_1(word); + } // cleanup case + + + if (options.titlecase === true) { + if (this.tags.ProperNoun && !this.titleCase()) ; else if (this.tags.Acronym) { + word = word.toUpperCase(); //uppercase acronyms + } else if (isUpperCase.test(word) && !this.tags.Acronym) { + // lowercase everything else + word = word.toLowerCase(); + } + } + + if (options.lowercase === true) { + word = word.toLowerCase(); + } // remove the '.'s from 'F.B.I.' (safely) + + + if (options.acronyms === true && this.tags.Acronym) { + word = word.replace(/\./g, ''); + } // -before/after- + + + if (options.whitespace === true || options.root === true) { + before = ''; + after = ' '; + + if ((hasSpace.test(this.post) === false || options.last) && !this.implicit) { + after = ''; + } + } + + if (options.punctuation === true && !options.root) { + //normalized end punctuation + if (this.hasPost('.') === true) { + after = '.' + after; + } else if (this.hasPost('?') === true) { + after = '?' + after; + } else if (this.hasPost('!') === true) { + after = '!' + after; + } else if (this.hasPost(',') === true) { + after = ',' + after; + } else if (this.hasEllipses() === true) { + after = '...' + after; + } + } + + if (showPre !== true) { + before = ''; + } + + if (showPost !== true) { + // let keep = after.match(/\)/) || '' + after = ''; //keep //after.replace(/[ .?!,]+/, '') + } // remove the '.' from 'Mrs.' (safely) + + + if (options.abbreviations === true && this.tags.Abbreviation) { + after = after.replace(/^\./, ''); + } + + return before + word + after; + }; + + var _04Text = { + textOut: textOut + }; + + var boringTags = { + Auxiliary: 1, + Possessive: 1 + }; + /** a subjective ranking of tags kinda tfidf-based */ + + var rankTags = function rankTags(term, world) { + var tags = Object.keys(term.tags); + var tagSet = world.tags; + tags = tags.sort(function (a, b) { + //bury the tags we dont want + if (boringTags[b] || !tagSet[b]) { + return -1; + } // unknown tags are interesting + + + if (!tagSet[b]) { + return 1; + } + + if (!tagSet[a]) { + return 0; + } // then sort by #of parent tags (most-specific tags first) + + + if (tagSet[a].lineage.length > tagSet[b].lineage.length) { + return 1; + } + + if (tagSet[a].isA.length > tagSet[b].isA.length) { + return -1; + } + + return 0; + }); + return tags; + }; + + var _bestTag = rankTags; + + var jsonDefault = { + text: true, + tags: true, + implicit: true, + whitespace: true, + clean: false, + id: false, + index: false, + offset: false, + bestTag: false + }; + /** return various metadata for this term */ + + var json = function json(options, world) { + options = options || {}; + options = Object.assign({}, jsonDefault, options); + var result = {}; // default on + + if (options.text) { + result.text = this.text; + } + + if (options.normal) { + result.normal = this.normal; + } + + if (options.tags) { + result.tags = Object.keys(this.tags); + } // default off + + + if (options.clean) { + result.clean = this.clean; + } + + if (options.id || options.offset) { + result.id = this.id; + } + + if (options.implicit && this.implicit !== null) { + result.implicit = this.implicit; + } + + if (options.whitespace) { + result.pre = this.pre; + result.post = this.post; + } + + if (options.bestTag) { + result.bestTag = _bestTag(this, world)[0]; + } + + return result; + }; + + var _05Json = { + json: json + }; + + var methods = Object.assign({}, _01Case, _02Punctuation, _03Misc, _04Text, _05Json); + + function isClientSide() { + return typeof window !== 'undefined' && window.document; + } + /** add spaces at the end */ + + + var padEnd = function padEnd(str, width) { + str = str.toString(); + + while (str.length < width) { + str += ' '; + } + + return str; + }; + /** output for verbose-mode */ + + + var logTag = function logTag(t, tag, reason) { + if (isClientSide()) { + console.log('%c' + padEnd(t.clean, 3) + ' + ' + tag + ' ', 'color: #6accb2;'); + return; + } //server-side + + + var log = '\x1b[33m' + padEnd(t.clean, 15) + '\x1b[0m + \x1b[32m' + tag + '\x1b[0m '; + + if (reason) { + log = padEnd(log, 35) + ' ' + reason + ''; + } + + console.log(log); + }; + /** output for verbose mode */ + + + var logUntag = function logUntag(t, tag, reason) { + if (isClientSide()) { + console.log('%c' + padEnd(t.clean, 3) + ' - ' + tag + ' ', 'color: #AB5850;'); + return; + } //server-side + + + var log = '\x1b[33m' + padEnd(t.clean, 3) + ' \x1b[31m - #' + tag + '\x1b[0m '; + + if (reason) { + log = padEnd(log, 35) + ' ' + reason; + } + + console.log(log); + }; + + var isArray = function isArray(arr) { + return Object.prototype.toString.call(arr) === '[object Array]'; + }; + + var titleCase = function titleCase(str) { + return str.charAt(0).toUpperCase() + str.substr(1); + }; + + var fns = { + logTag: logTag, + logUntag: logUntag, + isArray: isArray, + titleCase: titleCase + }; + + /** add a tag, and its descendents, to a term */ + + var addTag = function addTag(t, tag, reason, world) { + var tagset = world.tags; //support '.' or '-' notation for skipping the tag + + if (tag === '' || tag === '.' || tag === '-') { + return; + } + + if (tag[0] === '#') { + tag = tag.replace(/^#/, ''); + } + + tag = fns.titleCase(tag); //if we already got this one + + if (t.tags[tag] === true) { + return; + } // log it? + + + var isVerbose = world.isVerbose(); + + if (isVerbose === true) { + fns.logTag(t, tag, reason); + } //add tag + + + t.tags[tag] = true; //whee! + //check tagset for any additional things to do... + + if (tagset.hasOwnProperty(tag) === true) { + //add parent Tags + tagset[tag].isA.forEach(function (down) { + t.tags[down] = true; + + if (isVerbose === true) { + fns.logTag(t, '→ ' + down); + } + }); //remove any contrary tags + + t.unTag(tagset[tag].notA, '←', world); + } + }; + /** support an array of tags */ + + + var addTags = function addTags(term, tags, reason, world) { + if (typeof tags !== 'string') { + for (var i = 0; i < tags.length; i++) { + addTag(term, tags[i], reason, world); + } // tags.forEach(tag => addTag(term, tag, reason, world)) + + } else { + addTag(term, tags, reason, world); + } + }; + + var add = addTags; + + var lowerCase = /^[a-z]/; + + var titleCase$1 = function titleCase(str) { + return str.charAt(0).toUpperCase() + str.substr(1); + }; + /** remove this tag, and its descentents from the term */ + + + var unTag = function unTag(t, tag, reason, world) { + var isVerbose = world.isVerbose(); //support '*' for removing all tags + + if (tag === '*') { + t.tags = {}; + return t; + } + + tag = tag.replace(/^#/, ''); + + if (lowerCase.test(tag) === true) { + tag = titleCase$1(tag); + } // remove the tag + + + if (t.tags[tag] === true) { + delete t.tags[tag]; //log in verbose-mode + + if (isVerbose === true) { + fns.logUntag(t, tag, reason); + } + } //delete downstream tags too + + + var tagset = world.tags; + + if (tagset[tag]) { + var lineage = tagset[tag].lineage; + + for (var i = 0; i < lineage.length; i++) { + if (t.tags[lineage[i]] === true) { + delete t.tags[lineage[i]]; + + if (isVerbose === true) { + fns.logUntag(t, ' - ' + lineage[i]); + } + } + } + } + + return t; + }; //handle an array of tags + + + var untagAll = function untagAll(term, tags, reason, world) { + if (typeof tags !== 'string' && tags) { + for (var i = 0; i < tags.length; i++) { + unTag(term, tags[i], reason, world); + } + + return; + } + + unTag(term, tags, reason, world); + }; + + var unTag_1 = untagAll; + + var canBe = function canBe(term, tag, world) { + var tagset = world.tags; // cleanup tag + + if (tag[0] === '#') { + tag = tag.replace(/^#/, ''); + } //fail-fast + + + if (tagset[tag] === undefined) { + return true; + } //loop through tag's contradictory tags + + + var enemies = tagset[tag].notA || []; + + for (var i = 0; i < enemies.length; i++) { + if (term.tags[enemies[i]] === true) { + return false; + } + } + + if (tagset[tag].isA !== undefined) { + return canBe(term, tagset[tag].isA, world); //recursive + } + + return true; + }; + + var canBe_1 = canBe; + + /** add a tag or tags, and their descendents to this term + * @param {string | string[]} tags - a tag or tags + * @param {string?} [reason] a clue for debugging + */ + + var tag_1 = function tag_1(tags, reason, world) { + add(this, tags, reason, world); + return this; + }; + /** only tag this term if it's consistent with it's current tags */ + + + var tagSafe = function tagSafe(tags, reason, world) { + if (canBe_1(this, tags, world)) { + add(this, tags, reason, world); + } + + return this; + }; + /** remove a tag or tags, and their descendents from this term + * @param {string | string[]} tags - a tag or tags + * @param {string?} [reason] a clue for debugging + */ + + + var unTag_1$1 = function unTag_1$1(tags, reason, world) { + unTag_1(this, tags, reason, world); + return this; + }; + /** is this tag consistent with the word's current tags? + * @param {string | string[]} tags - a tag or tags + * @returns {boolean} + */ + + + var canBe_1$1 = function canBe_1$1(tags, world) { + return canBe_1(this, tags, world); + }; + + var tag = { + tag: tag_1, + tagSafe: tagSafe, + unTag: unTag_1$1, + canBe: canBe_1$1 + }; + + var Term = /*#__PURE__*/function () { + function Term() { + var text = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ''; + + _classCallCheck(this, Term); + + text = String(text); + var obj = parse(text); // the various forms of our text + + this.text = obj.text || ''; + this.clean = obj.clean; + this.reduced = obj.reduced; + this.root = null; + this.implicit = null; + this.pre = obj.pre || ''; + this.post = obj.post || ''; + this.tags = {}; + this.prev = null; + this.next = null; + this.id = _id(obj.clean); + this.isA = 'Term'; // easier than .constructor... + // support alternative matches + + if (obj.alias) { + this.alias = obj.alias; + } + } + /** set the text of the Term to something else*/ + + + _createClass(Term, [{ + key: "set", + value: function set(str) { + var obj = parse(str); + this.text = obj.text; + this.clean = obj.clean; + return this; + } + }]); + + return Term; + }(); + /** create a deep-copy of this term */ + + + Term.prototype.clone = function () { + var term = new Term(this.text); + term.pre = this.pre; + term.post = this.post; + term.clean = this.clean; + term.reduced = this.reduced; + term.root = this.root; + term.implicit = this.implicit; + term.tags = Object.assign({}, this.tags); //use the old id, so it can be matched with .match(doc) + // term.id = this.id + + return term; + }; + + Object.assign(Term.prototype, methods); + Object.assign(Term.prototype, tag); + var Term_1 = Term; + + /** return a flat array of Term objects */ + var terms = function terms(n) { + if (this.length === 0) { + return []; + } // use cache, if it exists + + + if (this.cache.terms) { + if (n !== undefined) { + return this.cache.terms[n]; + } + + return this.cache.terms; + } + + var terms = [this.pool.get(this.start)]; + + for (var i = 0; i < this.length - 1; i += 1) { + var id = terms[terms.length - 1].next; + + if (id === null) { + // throw new Error('linked-list broken') + console.error("Compromise error: Linked list broken in phrase '" + this.start + "'"); + break; + } + + var term = this.pool.get(id); + terms.push(term); //return this one? + + if (n !== undefined && n === i) { + return terms[n]; + } + } + + if (n === undefined) { + this.cache.terms = terms; + } + + if (n !== undefined) { + return terms[n]; + } + + return terms; + }; + /** return a shallow or deep copy of this phrase */ + + + var clone = function clone(isShallow) { + var _this = this; + + if (isShallow) { + var p = this.buildFrom(this.start, this.length); + p.cache = this.cache; + return p; + } //how do we clone part of the pool? + + + var terms = this.terms(); + var newTerms = terms.map(function (t) { + return t.clone(); + }); // console.log(newTerms) + //connect these new ids up + + newTerms.forEach(function (t, i) { + //add it to the pool.. + _this.pool.add(t); + + if (newTerms[i + 1]) { + t.next = newTerms[i + 1].id; + } + + if (newTerms[i - 1]) { + t.prev = newTerms[i - 1].id; + } + }); + return this.buildFrom(newTerms[0].id, newTerms.length); + }; + /** return last term object */ + + + var lastTerm = function lastTerm() { + var terms = this.terms(); + return terms[terms.length - 1]; + }; + /** quick lookup for a term id */ + + + var hasId = function hasId(wantId) { + if (this.length === 0 || !wantId) { + return false; + } + + if (this.start === wantId) { + return true; + } // use cache, if available + + + if (this.cache.terms) { + var _terms = this.cache.terms; + + for (var i = 0; i < _terms.length; i++) { + if (_terms[i].id === wantId) { + return true; + } + } + + return false; + } // otherwise, go through each term + + + var lastId = this.start; + + for (var _i = 0; _i < this.length - 1; _i += 1) { + var term = this.pool.get(lastId); + + if (term === undefined) { + console.error("Compromise error: Linked list broken. Missing term '".concat(lastId, "' in phrase '").concat(this.start, "'\n")); // throw new Error('linked List error') + + return false; + } + + if (term.next === wantId) { + return true; + } + + lastId = term.next; + } + + return false; + }; + /** how many seperate, non-empty words is it? */ + + + var wordCount = function wordCount() { + return this.terms().filter(function (t) { + return t.text !== ''; + }).length; + }; + /** get the full-sentence this phrase belongs to */ + + + var fullSentence = function fullSentence() { + var t = this.terms(0); //find first term in sentence + + while (t.prev) { + t = this.pool.get(t.prev); + } + + var start = t.id; + var len = 1; //go to end of sentence + + while (t.next) { + t = this.pool.get(t.next); + len += 1; + } + + return this.buildFrom(start, len); + }; + + var _01Utils = { + terms: terms, + clone: clone, + lastTerm: lastTerm, + hasId: hasId, + wordCount: wordCount, + fullSentence: fullSentence + }; + + var trimEnd = function trimEnd(str) { + return str.replace(/ +$/, ''); + }; + /** produce output in the given format */ + + + var text = function text() { + var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; + var isFirst = arguments.length > 1 ? arguments[1] : undefined; + var isLast = arguments.length > 2 ? arguments[2] : undefined; + + if (typeof options === 'string') { + if (options === 'normal') { + options = { + whitespace: true, + unicode: true, + lowercase: true, + punctuation: true, + acronyms: true, + abbreviations: true, + implicit: true, + normal: true + }; + } else if (options === 'clean') { + options = { + titlecase: false, + lowercase: true, + punctuation: true, + whitespace: true, + unicode: true, + implicit: true + }; + } else if (options === 'reduced') { + options = { + titlecase: false, + lowercase: true, + punctuation: false, + //FIXME: reversed? + whitespace: true, + unicode: true, + implicit: true, + reduced: true + }; + } else if (options === 'root') { + options = { + titlecase: false, + lowercase: true, + punctuation: true, + whitespace: true, + unicode: true, + implicit: true, + root: true + }; + } else { + options = {}; + } + } + + var terms = this.terms(); //this this phrase a complete sentence? + + var isFull = false; + + if (terms[0] && terms[0].prev === null && terms[terms.length - 1].next === null) { + isFull = true; + } + + var text = terms.reduce(function (str, t, i) { + options.last = isLast && i === terms.length - 1; + var showPre = true; + var showPost = true; + + if (isFull === false) { + // dont show beginning whitespace + if (i === 0 && isFirst) { + showPre = false; + } // dont show end-whitespace + + + if (i === terms.length - 1 && isLast) { + showPost = false; + } + } + + var txt = t.textOut(options, showPre, showPost); // if (options.titlecase && i === 0) { + // txt = titleCase(txt) + // } + + return str + txt; + }, ''); //full-phrases show punctuation, but not whitespace + + if (isFull === true && isLast) { + text = trimEnd(text); + } + + if (options.trim === true) { + text = text.trim(); + } + + return text; + }; + + var _02Text = { + text: text + }; + + /** remove start and end whitespace */ + var trim = function trim() { + var terms = this.terms(); + + if (terms.length > 0) { + //trim starting + terms[0].pre = terms[0].pre.replace(/^\s+/, ''); //trim ending + + var lastTerm = terms[terms.length - 1]; + lastTerm.post = lastTerm.post.replace(/\s+$/, ''); + } + + return this; + }; + + var _03Change = { + trim: trim + }; + + var endOfSentence = /[.?!]\s*$/; // replacing a 'word.' with a 'word!' + + var combinePost = function combinePost(before, after) { + //only transfer the whitespace + if (endOfSentence.test(after)) { + var whitespace = before.match(/\s*$/); + return after + whitespace; + } + + return before; + }; //add whitespace to the start of the second bit + + + var addWhitespace = function addWhitespace(beforeTerms, newTerms) { + // add any existing pre-whitespace to beginning + newTerms[0].pre = beforeTerms[0].pre; + var lastTerm = beforeTerms[beforeTerms.length - 1]; //add any existing punctuation to end of our new terms + + var newTerm = newTerms[newTerms.length - 1]; + newTerm.post = combinePost(lastTerm.post, newTerm.post); // remove existing punctuation + + lastTerm.post = ''; //before ←[space] - after + + if (lastTerm.post === '') { + lastTerm.post += ' '; + } + }; //insert this segment into the linked-list + + + var stitchIn = function stitchIn(beforeTerms, newTerms, pool) { + var lastBefore = beforeTerms[beforeTerms.length - 1]; + var lastNew = newTerms[newTerms.length - 1]; + var afterId = lastBefore.next; //connect ours in (main → newPhrase) + + lastBefore.next = newTerms[0].id; //stich the end in (newPhrase → after) + + lastNew.next = afterId; //do it backwards, too + + if (afterId) { + // newPhrase ← after + var afterTerm = pool.get(afterId); + afterTerm.prev = lastNew.id; + } // before ← newPhrase + + + var beforeId = beforeTerms[0].id; + + if (beforeId) { + var newTerm = newTerms[0]; + newTerm.prev = beforeId; + } + }; // avoid stretching a phrase twice. + + + var unique = function unique(list) { + return list.filter(function (o, i) { + return list.indexOf(o) === i; + }); + }; //append one phrase onto another. + + + var appendPhrase = function appendPhrase(before, newPhrase, doc) { + var beforeTerms = before.terms(); + var newTerms = newPhrase.terms(); //spruce-up the whitespace issues + + addWhitespace(beforeTerms, newTerms); //insert this segment into the linked-list + + stitchIn(beforeTerms, newTerms, before.pool); // stretch! + // make each effected phrase longer + + var toStretch = [before]; + var hasId = before.start; + var docs = [doc]; + docs = docs.concat(doc.parents()); // find them all! + + docs.forEach(function (parent) { + // only the phrases that should change + var shouldChange = parent.list.filter(function (p) { + return p.hasId(hasId); + }); + toStretch = toStretch.concat(shouldChange); + }); // don't double-count a phrase + + toStretch = unique(toStretch); + toStretch.forEach(function (p) { + p.length += newPhrase.length; + }); + before.cache = {}; + return before; + }; + + var append = appendPhrase; + + var hasSpace$1 = / /; //a new space needs to be added, either on the new phrase, or the old one + // '[new] [◻old]' -or- '[old] [◻new] [old]' + + var addWhitespace$1 = function addWhitespace(newTerms) { + //add a space before our new text? + // add a space after our text + var lastTerm = newTerms[newTerms.length - 1]; + + if (hasSpace$1.test(lastTerm.post) === false) { + lastTerm.post += ' '; + } + + return; + }; //insert this segment into the linked-list + + + var stitchIn$1 = function stitchIn(main, newPhrase, newTerms) { + // [newPhrase] → [main] + var lastTerm = newTerms[newTerms.length - 1]; + lastTerm.next = main.start; // [before] → [main] + + var pool = main.pool; + var start = pool.get(main.start); + + if (start.prev) { + var before = pool.get(start.prev); + before.next = newPhrase.start; + } //do it backwards, too + // before ← newPhrase + + + newTerms[0].prev = main.terms(0).prev; // newPhrase ← main + + main.terms(0).prev = lastTerm.id; + }; + + var unique$1 = function unique(list) { + return list.filter(function (o, i) { + return list.indexOf(o) === i; + }); + }; //append one phrase onto another + + + var joinPhrase = function joinPhrase(original, newPhrase, doc) { + var starterId = original.start; + var newTerms = newPhrase.terms(); //spruce-up the whitespace issues + + addWhitespace$1(newTerms); //insert this segment into the linked-list + + stitchIn$1(original, newPhrase, newTerms); //increase the length of our phrases + + var toStretch = [original]; + var docs = [doc]; + docs = docs.concat(doc.parents()); + docs.forEach(function (d) { + // only the phrases that should change + var shouldChange = d.list.filter(function (p) { + return p.hasId(starterId) || p.hasId(newPhrase.start); + }); + toStretch = toStretch.concat(shouldChange); + }); // don't double-count + + toStretch = unique$1(toStretch); // stretch these phrases + + toStretch.forEach(function (p) { + p.length += newPhrase.length; // change the start too, if necessary + + if (p.start === starterId) { + p.start = newPhrase.start; + } + + p.cache = {}; + }); + return original; + }; + + var prepend = joinPhrase; + + //recursively decrease the length of all the parent phrases + var shrinkAll = function shrinkAll(doc, id, deleteLength, after) { + var arr = doc.parents(); + arr.push(doc); + arr.forEach(function (d) { + //find our phrase to shrink + var phrase = d.list.find(function (p) { + return p.hasId(id); + }); + + if (!phrase) { + return; + } + + phrase.length -= deleteLength; // does it start with this soon-removed word? + + if (phrase.start === id) { + phrase.start = after.id; + } + + phrase.cache = {}; + }); // cleanup empty phrase objects + + doc.list = doc.list.filter(function (p) { + if (!p.start || !p.length) { + return false; + } + + return true; + }); + }; + /** wrap the linked-list around these terms + * so they don't appear any more + */ + + + var deletePhrase = function deletePhrase(phrase, doc) { + var pool = doc.pool(); + var terms = phrase.terms(); //grab both sides of the chain, + + var prev = pool.get(terms[0].prev) || {}; + var after = pool.get(terms[terms.length - 1].next) || {}; + + if (terms[0].implicit && prev.implicit) { + prev.set(prev.implicit); + prev.post += ' '; + } // //first, change phrase lengths + + + shrinkAll(doc, phrase.start, phrase.length, after); // connect [prev]->[after] + + if (prev) { + prev.next = after.id; + } // connect [prev]<-[after] + + + if (after) { + after.prev = prev.id; + } // lastly, actually delete the terms from the pool? + // for (let i = 0; i < terms.length; i++) { + // pool.remove(terms[i].id) + // } + + }; + + var _delete = deletePhrase; + + /** put this text at the end */ + + var append_1 = function append_1(newPhrase, doc) { + append(this, newPhrase, doc); + return this; + }; + /** add this text to the beginning */ + + + var prepend_1 = function prepend_1(newPhrase, doc) { + prepend(this, newPhrase, doc); + return this; + }; + + var _delete$1 = function _delete$1(doc) { + _delete(this, doc); + return this; + }; // stich-in newPhrase, stretch 'doc' + parents + + + var replace = function replace(newPhrase, doc) { + //add it do the end + var firstLength = this.length; + append(this, newPhrase, doc); //delete original terms + + var tmp = this.buildFrom(this.start, this.length); + tmp.length = firstLength; + _delete(tmp, doc); + }; + /** + * Turn this phrase object into 3 phrase objects + */ + + + var splitOn = function splitOn(p) { + var terms = this.terms(); + var result = { + before: null, + match: null, + after: null + }; + var index = terms.findIndex(function (t) { + return t.id === p.start; + }); + + if (index === -1) { + return result; + } //make all three sections into phrase-objects + + + var start = terms.slice(0, index); + + if (start.length > 0) { + result.before = this.buildFrom(start[0].id, start.length); + } + + var match = terms.slice(index, index + p.length); + + if (match.length > 0) { + result.match = this.buildFrom(match[0].id, match.length); + } + + var end = terms.slice(index + p.length, terms.length); + + if (end.length > 0) { + result.after = this.buildFrom(end[0].id, end.length, this.pool); + } + + return result; + }; + + var _04Insert = { + append: append_1, + prepend: prepend_1, + "delete": _delete$1, + replace: replace, + splitOn: splitOn + }; + + /** return json metadata for this phrase */ + var json$1 = function json() { + var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; + var world = arguments.length > 1 ? arguments[1] : undefined; + var res = {}; // text data + + if (options.text) { + res.text = this.text(); + } + + if (options.normal) { + res.normal = this.text('normal'); + } + + if (options.clean) { + res.clean = this.text('clean'); + } + + if (options.reduced) { + res.reduced = this.text('reduced'); + } + + if (options.root) { + res.root = this.text('root'); + } + + if (options.trim) { + if (res.text) { + res.text = res.text.trim(); + } + + if (res.normal) { + res.normal = res.normal.trim(); + } + + if (res.reduced) { + res.reduced = res.reduced.trim(); + } + } // terms data + + + if (options.terms) { + if (options.terms === true) { + options.terms = {}; + } + + res.terms = this.terms().map(function (t) { + return t.json(options.terms, world); + }); + } + + return res; + }; + + var _05Json$1 = { + json: json$1 + }; + + /** match any terms after this phrase */ + var lookAhead = function lookAhead(regs) { + // if empty match string, return everything after + if (!regs) { + regs = '.*'; + } + + var pool = this.pool; // get a list of all terms preceding our start + + var terms = []; + + var getAfter = function getAfter(id) { + var term = pool.get(id); + + if (!term) { + return; + } + + terms.push(term); + + if (term.prev) { + getAfter(term.next); //recursion + } + }; + + var all = this.terms(); + var lastTerm = all[all.length - 1]; + getAfter(lastTerm.next); + + if (terms.length === 0) { + return []; + } // got the terms, make a phrase from them + + + var p = this.buildFrom(terms[0].id, terms.length); + return p.match(regs); + }; + /** match any terms before this phrase */ + + + var lookBehind = function lookBehind(regs) { + // if empty match string, return everything before + if (!regs) { + regs = '.*'; + } + + var pool = this.pool; // get a list of all terms preceding our start + + var terms = []; + + var getBefore = function getBefore(id) { + var term = pool.get(id); + + if (!term) { + return; + } + + terms.push(term); + + if (term.prev) { + getBefore(term.prev); //recursion + } + }; + + var term = pool.get(this.start); + getBefore(term.prev); + + if (terms.length === 0) { + return []; + } // got the terms, make a phrase from them + + + var p = this.buildFrom(terms[terms.length - 1].id, terms.length); + return p.match(regs); + }; + + var _06Lookahead = { + lookAhead: lookAhead, + lookBehind: lookBehind + }; + + var methods$1 = Object.assign({}, _01Utils, _02Text, _03Change, _04Insert, _05Json$1, _06Lookahead); + + // try to avoid doing the match + var failFast = function failFast(p, regs) { + if (regs.length === 0) { + return true; + } + + for (var i = 0; i < regs.length; i += 1) { + var reg = regs[i]; //logical quick-ones + + if (reg.optional !== true && reg.negative !== true) { + //start/end impossibilites + if (reg.start === true && i > 0) { + return true; + } + } //this is not possible + + + if (reg.anything === true && reg.negative === true) { + return true; + } + } + + return false; + }; + + var _02FailFast = failFast; + + //found a match? it's greedy? keep going! + + var getGreedy = function getGreedy(terms, t, reg, until, index, length) { + var start = t; + + for (; t < terms.length; t += 1) { + //stop for next-reg match + if (until && terms[t].doesMatch(until, index + t, length)) { + return t; + } + + var count = t - start + 1; // is it max-length now? + + if (reg.max !== undefined && count === reg.max) { + return t; + } //stop here + + + if (terms[t].doesMatch(reg, index + t, length) === false) { + // is it too short? + if (reg.min !== undefined && count < reg.min) { + return null; + } + + return t; + } + } + + return t; + }; //'unspecific greedy' is a weird situation. + + + var greedyTo = function greedyTo(terms, t, nextReg, index, length) { + //if there's no next one, just go off the end! + if (!nextReg) { + return terms.length; + } //otherwise, we're looking for the next one + + + for (; t < terms.length; t += 1) { + if (terms[t].doesMatch(nextReg, index + t, length) === true) { + return t; + } + } //guess it doesn't exist, then. + + + return null; + }; // get or create named group + + + var getOrCreateGroup = function getOrCreateGroup(namedGroups, namedGroupId, terms, startIndex, group) { + var g = namedGroups[namedGroupId]; + + if (g) { + return g; + } + + var id = terms[startIndex].id; + namedGroups[namedGroupId] = { + group: String(group), + start: id, + length: 0 + }; + return namedGroups[namedGroupId]; + }; + /** tries to match a sequence of terms, starting from here */ + + + var tryHere = function tryHere(terms, regs, index, length) { + var namedGroups = {}; + var previousGroupId = null; + var t = 0; // we must satisfy each rule in 'regs' + + for (var r = 0; r < regs.length; r += 1) { + var reg = regs[r]; // Check if this reg has a named capture group + + var isNamedGroup = typeof reg.named === 'string' || typeof reg.named === 'number'; + var namedGroupId = null; // Reuse previous capture group if same + + if (isNamedGroup) { + var prev = regs[r - 1]; + + if (prev && prev.named === reg.named && previousGroupId) { + namedGroupId = previousGroupId; + } else { + namedGroupId = _id(reg.named); + previousGroupId = namedGroupId; + } + } //should we fail here? + + + if (!terms[t]) { + //are all remaining regs optional? + var hasNeeds = regs.slice(r).some(function (remain) { + return !remain.optional; + }); + + if (hasNeeds === false) { + break; + } // have unmet needs + + + return [false, null]; + } //support 'unspecific greedy' .* properly + + + if (reg.anything === true && reg.greedy === true) { + var skipto = greedyTo(terms, t, regs[r + 1], reg, index); // ensure it's long enough + + if (reg.min !== undefined && skipto - t < reg.min) { + return [false, null]; + } // reduce it back, if it's too long + + + if (reg.max !== undefined && skipto - t > reg.max) { + t = t + reg.max; + continue; + } + + if (skipto === null) { + return [false, null]; //couldn't find it + } // is it really this easy?.... + + + if (isNamedGroup) { + var g = getOrCreateGroup(namedGroups, namedGroupId, terms, t, reg.named); // Update group + + g.length = skipto - t; + } + + t = skipto; + continue; + } //if it looks like a match, continue + //we have a special case where an end-anchored greedy match may need to + //start matching before the actual end; we do this by (temporarily!) + //removing the "end" property from the matching token... since this is + //very situation-specific, we *only* do this when we really need to. + + + if (reg.anything === true || reg.end === true && reg.greedy === true && index + t < length - 1 && terms[t].doesMatch(Object.assign({}, reg, { + end: false + }), index + t, length) === true || terms[t].doesMatch(reg, index + t, length) === true) { + var startAt = t; // okay, it was a match, but if it optional too, + // we should check the next reg too, to skip it? + + if (reg.optional && regs[r + 1]) { + // does the next reg match it too? + if (terms[t].doesMatch(regs[r + 1], index + t, length) === true) { + // but does the next reg match the next term?? + // only skip if it doesn't + if (!terms[t + 1] || terms[t + 1].doesMatch(regs[r + 1], index + t, length) === false) { + r += 1; + } + } + } //advance to the next term! + + + t += 1; //check any ending '$' flags + + if (reg.end === true) { + //if this isn't the last term, refuse the match + if (t !== terms.length && reg.greedy !== true) { + return [false, null]; + } + } //try keep it going! + + + if (reg.greedy === true) { + // for greedy checking, we no longer care about the reg.start + // value, and leaving it can cause failures for anchored greedy + // matches. ditto for end-greedy matches: we need an earlier non- + // ending match to succceed until we get to the actual end. + t = getGreedy(terms, t, Object.assign({}, reg, { + start: false, + end: false + }), regs[r + 1], index, length); + + if (t === null) { + return [false, null]; //greedy was too short + } + + if (reg.min && reg.min > t) { + return [false, null]; //greedy was too short + } // if this was also an end-anchor match, check to see we really + // reached the end + + + if (reg.end === true && index + t !== length) { + return [false, null]; //greedy didn't reach the end + } + } + + if (isNamedGroup) { + // Get or create capture group + var _g = getOrCreateGroup(namedGroups, namedGroupId, terms, startAt, reg.named); // Update group - add greedy or increment length + + + if (t > 1 && reg.greedy) { + _g.length += t - startAt; + } else { + _g.length++; + } + } + + continue; + } //bah, who cares, keep going + + + if (reg.optional === true) { + continue; + } // should we skip-over an implicit word? + + + if (terms[t].isImplicit() && regs[r - 1] && terms[t + 1]) { + // does the next one match? + if (terms[t + 1].doesMatch(reg, index + t, length)) { + t += 2; + continue; + } + } // console.log(' ❌\n\n') + + + return [false, null]; + } //return our result + + + return [terms.slice(0, t), namedGroups]; + }; + + var _03TryMatch = tryHere; + + var postProcess = function postProcess(terms, regs, matches) { + if (!matches || matches.length === 0) { + return matches; + } // ensure end reg has the end term + + + var atEnd = regs.some(function (r) { + return r.end; + }); + + if (atEnd) { + var lastTerm = terms[terms.length - 1]; + matches = matches.filter(function (_ref) { + var arr = _ref.match; + return arr.indexOf(lastTerm) !== -1; + }); + } + + return matches; + }; + + var _04PostProcess = postProcess; + + /* break-down a match expression into this: + { + word:'', + tag:'', + regex:'', + + start:false, + end:false, + negative:false, + anything:false, + greedy:false, + optional:false, + + named:'', + choices:[], + } + */ + var hasMinMax = /\{([0-9]+,?[0-9]*)\}/; + var andSign = /&&/; + var captureName = new RegExp(/^<(\S+)>/); + + var titleCase$2 = function titleCase(str) { + return str.charAt(0).toUpperCase() + str.substr(1); + }; + + var end = function end(str) { + return str[str.length - 1]; + }; + + var start = function start(str) { + return str[0]; + }; + + var stripStart = function stripStart(str) { + return str.substr(1); + }; + + var stripEnd = function stripEnd(str) { + return str.substr(0, str.length - 1); + }; + + var stripBoth = function stripBoth(str) { + str = stripStart(str); + str = stripEnd(str); + return str; + }; // + + + var parseToken = function parseToken(w) { + var obj = {}; //collect any flags (do it twice) + + for (var i = 0; i < 2; i += 1) { + //end-flag + if (end(w) === '$') { + obj.end = true; + w = stripEnd(w); + } //front-flag + + + if (start(w) === '^') { + obj.start = true; + w = stripStart(w); + } //capture group (this one can span multiple-terms) + + + if (start(w) === '[' || end(w) === ']') { + obj.named = true; + + if (start(w) === '[') { + obj.groupType = end(w) === ']' ? 'single' : 'start'; + } else { + obj.groupType = 'end'; + } + + w = w.replace(/^\[/, ''); + w = w.replace(/\]$/, ''); // Use capture group name + + if (start(w) === '<') { + var res = captureName.exec(w); + + if (res.length >= 2) { + obj.named = res[1]; + w = w.replace(res[0], ''); + } + } + } //back-flags + + + if (end(w) === '+') { + obj.greedy = true; + w = stripEnd(w); + } + + if (w !== '*' && end(w) === '*' && w !== '\\*') { + obj.greedy = true; + w = stripEnd(w); + } + + if (end(w) === '?') { + obj.optional = true; + w = stripEnd(w); + } + + if (start(w) === '!') { + obj.negative = true; + w = stripStart(w); + } //wrapped-flags + + + if (start(w) === '(' && end(w) === ')') { + // support (one && two) + if (andSign.test(w)) { + obj.choices = w.split(andSign); + obj.operator = 'and'; + } else { + obj.choices = w.split('|'); + obj.operator = 'or'; + } //remove '(' and ')' + + + obj.choices[0] = stripStart(obj.choices[0]); + var last = obj.choices.length - 1; + obj.choices[last] = stripEnd(obj.choices[last]); // clean up the results + + obj.choices = obj.choices.map(function (s) { + return s.trim(); + }); + obj.choices = obj.choices.filter(function (s) { + return s; + }); //recursion alert! + + obj.choices = obj.choices.map(parseToken); + w = ''; + } //regex + + + if (start(w) === '/' && end(w) === '/') { + w = stripBoth(w); + obj.regex = new RegExp(w); //potential vuln - security/detect-non-literal-regexp + + return obj; + } //soft-match + + + if (start(w) === '~' && end(w) === '~') { + w = stripBoth(w); + obj.soft = true; + obj.word = w; + return obj; + } + } // support #Tag{0,9} + + + if (hasMinMax.test(w) === true) { + w = w.replace(hasMinMax, function (a, b) { + var arr = b.split(/,/g); + + if (arr.length === 1) { + // '{3}' Exactly three times + obj.min = Number(arr[0]); + obj.max = Number(arr[0]); + } else { + // '{2,4}' Two to four times + // '{3,}' Three or more times + obj.min = Number(arr[0]); + obj.max = Number(arr[1] || 999); + } + + obj.greedy = true; + return ''; + }); + } //do the actual token content + + + if (start(w) === '#') { + obj.tag = stripStart(w); + obj.tag = titleCase$2(obj.tag); + return obj; + } //dynamic function on a term object + + + if (start(w) === '@') { + obj.method = stripStart(w); + return obj; + } + + if (w === '.') { + obj.anything = true; + return obj; + } //support alone-astrix + + + if (w === '*') { + obj.anything = true; + obj.greedy = true; + obj.optional = true; + return obj; + } + + if (w) { + //somehow handle encoded-chars? + w = w.replace('\\*', '*'); + w = w.replace('\\.', '.'); + obj.word = w.toLowerCase(); + } + + return obj; + }; + + var parseToken_1 = parseToken; + + var isNamed = function isNamed(capture) { + return typeof capture === 'string' || typeof capture === 'number'; + }; + + var fillGroups = function fillGroups(tokens) { + var convert = false; + var index = -1; + var current; //'fill in' capture groups between start-end + + for (var i = 0; i < tokens.length; i++) { + var n = tokens[i]; // Give name to un-named single tokens + + if (n.groupType === 'single' && n.named === true) { + index += 1; + n.named = index; + continue; + } // Start converting tokens + + + if (n.groupType === 'start') { + convert = true; + + if (isNamed(n.named)) { + current = n.named; + } else { + index += 1; + current = index; + } + } // Ensure this token has the right name + + + if (convert) { + n.named = current; + } // Stop converting tokens + + + if (n.groupType === 'end') { + convert = false; + } + } + + return tokens; + }; + + var useOneOf = function useOneOf(tokens) { + return tokens.map(function (token) { + if (token.choices !== undefined) { + // are they all straight non-optional words? + var shouldPack = token.choices.every(function (c) { + return c.optional !== true && c.negative !== true && c.word !== undefined; + }); + + if (shouldPack === true) { + var oneOf = {}; + token.choices.forEach(function (c) { + return oneOf[c.word] = true; + }); + token.oneOf = oneOf; + delete token.choices; + } + } + + return token; + }); + }; + + var postProcess$1 = function postProcess(tokens) { + // ensure all capture groups are filled between start and end + // give all capture groups names + var count = tokens.filter(function (t) { + return t.groupType; + }).length; + + if (count > 0) { + tokens = fillGroups(tokens); + } // convert 'choices' format to 'oneOf' format + + + tokens = useOneOf(tokens); // console.log(tokens) + + return tokens; + }; + + var postProcess_1 = postProcess$1; + + var isArray$1 = function isArray(arr) { + return Object.prototype.toString.call(arr) === '[object Array]'; + }; //split-up by (these things) + + + var byParentheses = function byParentheses(str) { + var arr = str.split(/([\^\[\!]*(?:<\S+>)?\(.*?\)[?+*]*\]?\$?)/); + arr = arr.map(function (s) { + return s.trim(); + }); + return arr; + }; + + var byWords = function byWords(arr) { + var words = []; + arr.forEach(function (a) { + //keep brackets lumped together + if (/^[[^_/]?\(/.test(a[0])) { + words.push(a); + return; + } + + var list = a.split(' '); + list = list.filter(function (w) { + return w; + }); + words = words.concat(list); + }); + return words; + }; //turn an array into a 'choices' list + + + var byArray = function byArray(arr) { + return [{ + choices: arr.map(function (s) { + return { + word: s + }; + }) + }]; + }; + + var fromDoc = function fromDoc(doc) { + if (!doc || !doc.list || !doc.list[0]) { + return []; + } + + var ids = []; + doc.list.forEach(function (p) { + p.terms().forEach(function (t) { + ids.push({ + id: t.id + }); + }); + }); + return [{ + choices: ids, + greedy: true + }]; + }; + /** parse a match-syntax string into json */ + + + var syntax = function syntax(input) { + // fail-fast + if (input === null || input === undefined || input === '') { + return []; + } //try to support a ton of different formats: + + + if (_typeof(input) === 'object') { + if (isArray$1(input)) { + if (input.length === 0 || !input[0]) { + return []; + } //is it a pre-parsed reg-list? + + + if (_typeof(input[0]) === 'object') { + return input; + } //support a flat array of normalized words + + + if (typeof input[0] === 'string') { + return byArray(input); + } + } //support passing-in a compromise object as a match + + + if (input && input.isA === 'Doc') { + return fromDoc(input); + } + + return []; + } + + if (typeof input === 'number') { + input = String(input); //go for it? + } + + var tokens = byParentheses(input); + tokens = byWords(tokens); + tokens = tokens.map(parseToken_1); //clean up anything weird + + tokens = postProcess_1(tokens); // console.log(JSON.stringify(tokens, null, 2)) + + return tokens; + }; + + var syntax_1 = syntax; + + /** returns a simple array of arrays */ + + var matchAll = function matchAll(p, regs) { + var matchOne = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false; + + //if we forgot to parse it.. + if (typeof regs === 'string') { + regs = syntax_1(regs); + } //try to dismiss it, at-once + + + if (_02FailFast(p, regs) === true) { + return []; + } //any match needs to be this long, at least + + + var minLength = regs.filter(function (r) { + return r.optional !== true; + }).length; + var terms = p.terms(); + var matches = []; //optimisation for '^' start logic + + if (regs[0].start === true) { + var _tryMatch = _03TryMatch(terms, regs, 0, terms.length), + _tryMatch2 = _slicedToArray(_tryMatch, 2), + match = _tryMatch2[0], + groups = _tryMatch2[1]; + + if (match !== false && match.length > 0) { + match = match.filter(function (m) { + return m; + }); + matches.push({ + match: match, + groups: groups + }); + } + + return _04PostProcess(terms, regs, matches); + } //try starting, from every term + + + for (var i = 0; i < terms.length; i += 1) { + // slice may be too short + if (i + minLength > terms.length) { + break; + } //try it! + + + var _tryMatch3 = _03TryMatch(terms.slice(i), regs, i, terms.length), + _tryMatch4 = _slicedToArray(_tryMatch3, 2), + _match = _tryMatch4[0], + _groups = _tryMatch4[1]; + + if (_match !== false && _match.length > 0) { + //zoom forward! + i += _match.length - 1; //[capture-groups] return some null responses + + _match = _match.filter(function (m) { + return m; + }); + matches.push({ + match: _match, + groups: _groups + }); //ok, maybe that's enough? + + if (matchOne === true) { + return _04PostProcess(terms, regs, matches); + } + } + } + + return _04PostProcess(terms, regs, matches); + }; + + var _01MatchAll = matchAll; + + /** return anything that doesn't match. + * returns a simple array of arrays + */ + + var notMatch = function notMatch(p, regs) { + var found = {}; + var arr = _01MatchAll(p, regs); + arr.forEach(function (_ref) { + var ts = _ref.match; + ts.forEach(function (t) { + found[t.id] = true; + }); + }); //return anything not found + + var terms = p.terms(); + var result = []; + var current = []; + terms.forEach(function (t) { + if (found[t.id] === true) { + if (current.length > 0) { + result.push(current); + current = []; + } + + return; + } + + current.push(t); + }); + + if (current.length > 0) { + result.push(current); + } + + return result; + }; + + var not = notMatch; + + /** return an array of matching phrases */ + + var match_1 = function match_1(regs) { + var _this = this; + + var justOne = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false; + var matches = _01MatchAll(this, regs, justOne); //make them phrase objects + + matches = matches.map(function (_ref) { + var match = _ref.match, + groups = _ref.groups; + + var p = _this.buildFrom(match[0].id, match.length, groups); + + p.cache.terms = match; + return p; + }); + return matches; + }; + /** return boolean if one match is found */ + + + var has = function has(regs) { + var matches = _01MatchAll(this, regs, true); + return matches.length > 0; + }; + /** remove all matches from the result */ + + + var not$1 = function not$1(regs) { + var _this2 = this; + + var matches = not(this, regs); //make them phrase objects + + matches = matches.map(function (list) { + return _this2.buildFrom(list[0].id, list.length); + }); + return matches; + }; + /** return a list of phrases that can have this tag */ + + + var canBe$1 = function canBe(tag, world) { + var _this3 = this; + + var results = []; + var terms = this.terms(); + var previous = false; + + for (var i = 0; i < terms.length; i += 1) { + var can = terms[i].canBe(tag, world); + + if (can === true) { + if (previous === true) { + //add it to the end + results[results.length - 1].push(terms[i]); + } else { + results.push([terms[i]]); //make a new one + } + + previous = can; + } + } //turn them into Phrase objects + + + results = results.filter(function (a) { + return a.length > 0; + }).map(function (arr) { + return _this3.buildFrom(arr[0].id, arr.length); + }); + return results; + }; + + var match = { + match: match_1, + has: has, + not: not$1, + canBe: canBe$1 + }; + + var Phrase = function Phrase(id, length, pool) { + _classCallCheck(this, Phrase); + + this.start = id; + this.length = length; + this.isA = 'Phrase'; // easier than .constructor... + + Object.defineProperty(this, 'pool', { + enumerable: false, + writable: true, + value: pool + }); + Object.defineProperty(this, 'cache', { + enumerable: false, + writable: true, + value: {} + }); + Object.defineProperty(this, 'groups', { + enumerable: false, + writable: true, + value: {} + }); + }; + /** create a new Phrase object from an id and length */ + + + Phrase.prototype.buildFrom = function (id, length, groups) { + var p = new Phrase(id, length, this.pool); //copy-over or replace capture-groups too + + if (groups && Object.keys(groups).length > 0) { + p.groups = groups; + } else { + p.groups = this.groups; + } + + return p; + }; //apply methods + + + Object.assign(Phrase.prototype, match); + Object.assign(Phrase.prototype, methods$1); //apply aliases + + var aliases = { + term: 'terms' + }; + Object.keys(aliases).forEach(function (k) { + return Phrase.prototype[k] = Phrase.prototype[aliases[k]]; + }); + var Phrase_1 = Phrase; + + /** a key-value store of all terms in our Document */ + var Pool = /*#__PURE__*/function () { + function Pool() { + var words = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; + + _classCallCheck(this, Pool); + + //quiet this property in console.logs + Object.defineProperty(this, 'words', { + enumerable: false, + value: words + }); + } + /** throw a new term object in */ + + + _createClass(Pool, [{ + key: "add", + value: function add(term) { + this.words[term.id] = term; + return this; + } + /** find a term by it's id */ + + }, { + key: "get", + value: function get(id) { + return this.words[id]; + } + /** find a term by it's id */ + + }, { + key: "remove", + value: function remove(id) { + delete this.words[id]; + } + }, { + key: "merge", + value: function merge(pool) { + Object.assign(this.words, pool.words); + return this; + } + /** helper method */ + + }, { + key: "stats", + value: function stats() { + return { + words: Object.keys(this.words).length + }; + } + }]); + + return Pool; + }(); + /** make a deep-copy of all terms */ + + + Pool.prototype.clone = function () { + var _this = this; + + var keys = Object.keys(this.words); + var words = keys.reduce(function (h, k) { + var t = _this.words[k].clone(); + + h[t.id] = t; + return h; + }, {}); + return new Pool(words); + }; + + var Pool_1 = Pool; + + //add forward/backward 'linked-list' prev/next ids + var linkTerms = function linkTerms(terms) { + terms.forEach(function (term, i) { + if (i > 0) { + term.prev = terms[i - 1].id; + } + + if (terms[i + 1]) { + term.next = terms[i + 1].id; + } + }); + }; + + var _linkTerms = linkTerms; + + //(Rule-based sentence boundary segmentation) - chop given text into its proper sentences. + // Ignore periods/questions/exclamations used in acronyms/abbreviations/numbers, etc. + // @spencermountain 2017 MIT + //proper nouns with exclamation marks + // const blacklist = { + // yahoo: true, + // joomla: true, + // jeopardy: true, + // } + //regs- + var initSplit = /(\S.+?[.!?\u203D\u2E18\u203C\u2047-\u2049])(?=\s+|$)/g; + var hasSomething = /\S/; + var isAcronym$1 = /[ .][A-Z]\.? *$/i; + var hasEllipse = /(?:\u2026|\.{2,}) *$/; + var newLine = /((?:\r?\n|\r)+)/; // Match different new-line formats + + var hasLetter = /[a-z0-9\u00C0-\u00FF\u00a9|\u00ae|[\u2000-\u3300]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]/i; + var startWhitespace = /^\s+/; // Start with a regex: + + var naiive_split = function naiive_split(text) { + var all = []; //first, split by newline + + var lines = text.split(newLine); + + for (var i = 0; i < lines.length; i++) { + //split by period, question-mark, and exclamation-mark + var arr = lines[i].split(initSplit); + + for (var o = 0; o < arr.length; o++) { + all.push(arr[o]); + } + } + + return all; + }; + /** does this look like a sentence? */ + + + var isSentence = function isSentence(str, abbrevs) { + // check for 'F.B.I.' + if (isAcronym$1.test(str) === true) { + return false; + } //check for '...' + + + if (hasEllipse.test(str) === true) { + return false; + } // must have a letter + + + if (hasLetter.test(str) === false) { + return false; + } + + var txt = str.replace(/[.!?\u203D\u2E18\u203C\u2047-\u2049] *$/, ''); + var words = txt.split(' '); + var lastWord = words[words.length - 1].toLowerCase(); // check for 'Mr.' + + if (abbrevs.hasOwnProperty(lastWord)) { + return false; + } // //check for jeopardy! + // if (blacklist.hasOwnProperty(lastWord)) { + // return false + // } + + + return true; + }; + + var splitSentences = function splitSentences(text, world) { + var abbrevs = world.cache.abbreviations; + text = text || ''; + text = String(text); + var sentences = []; // First do a greedy-split.. + + var chunks = []; // Ensure it 'smells like' a sentence + + if (!text || typeof text !== 'string' || hasSomething.test(text) === false) { + return sentences; + } // cleanup unicode-spaces + + + text = text.replace('\xa0', ' '); // Start somewhere: + + var splits = naiive_split(text); // Filter-out the crap ones + + for (var i = 0; i < splits.length; i++) { + var s = splits[i]; + + if (s === undefined || s === '') { + continue; + } //this is meaningful whitespace + + + if (hasSomething.test(s) === false) { + //add it to the last one + if (chunks[chunks.length - 1]) { + chunks[chunks.length - 1] += s; + continue; + } else if (splits[i + 1]) { + //add it to the next one + splits[i + 1] = s + splits[i + 1]; + continue; + } + } //else, only whitespace, no terms, no sentence + + + chunks.push(s); + } //detection of non-sentence chunks: + //loop through these chunks, and join the non-sentence chunks back together.. + + + for (var _i = 0; _i < chunks.length; _i++) { + var c = chunks[_i]; //should this chunk be combined with the next one? + + if (chunks[_i + 1] && isSentence(c, abbrevs) === false) { + chunks[_i + 1] = c + (chunks[_i + 1] || ''); + } else if (c && c.length > 0) { + //&& hasLetter.test(c) + //this chunk is a proper sentence.. + sentences.push(c); + chunks[_i] = ''; + } + } //if we never got a sentence, return the given text + + + if (sentences.length === 0) { + return [text]; + } //move whitespace to the ends of sentences, when possible + //['hello',' world'] -> ['hello ','world'] + + + for (var _i2 = 1; _i2 < sentences.length; _i2 += 1) { + var ws = sentences[_i2].match(startWhitespace); + + if (ws !== null) { + sentences[_i2 - 1] += ws[0]; + sentences[_i2] = sentences[_i2].replace(startWhitespace, ''); + } + } + + return sentences; + }; + + var _01Sentences = splitSentences; // console.log(sentence_parser('john f. kennedy')); + + var wordlike = /\S/; + var isBoundary = /^[!?.]+$/; + var naiiveSplit = /(\S+)/; + var isSlash = /[a-z] ?\/ ?[a-z]*$/; + var notWord = { + '.': true, + '-': true, + //dash + '–': true, + //en-dash + '—': true, + //em-dash + '--': true, + '...': true // '/': true, // 'one / two' + + }; + + var hasHyphen = function hasHyphen(str) { + //dont split 're-do' + if (/^(re|un)-?[^aeiou]./.test(str) === true) { + return false; + } //letter-number + + + var reg = /^([a-z\u00C0-\u00FF`"'/]+)(-|–|—)([a-z0-9\u00C0-\u00FF].*)/i; + + if (reg.test(str) === true) { + return true; + } //support weird number-emdash combo '2010–2011' + // let reg2 = /^([0-9]+)(–|—)([0-9].*)/i + // if (reg2.test(str)) { + // return true + // } + + + return false; + }; // 'he / she' should be one word + + + var combineSlashes = function combineSlashes(arr) { + for (var i = 1; i < arr.length - 1; i++) { + if (isSlash.test(arr[i])) { + arr[i - 1] += arr[i] + arr[i + 1]; + arr[i] = null; + arr[i + 1] = null; + } + } + + return arr; + }; + + var splitHyphens = function splitHyphens(word) { + var arr = []; //support multiple-hyphenated-terms + + var hyphens = word.split(/[-–—]/); + var whichDash = '-'; + var found = word.match(/[-–—]/); + + if (found && found[0]) { + whichDash = found; + } + + for (var o = 0; o < hyphens.length; o++) { + if (o === hyphens.length - 1) { + arr.push(hyphens[o]); + } else { + arr.push(hyphens[o] + whichDash); + } + } + + return arr; + }; + + var isArray$2 = function isArray(arr) { + return Object.prototype.toString.call(arr) === '[object Array]'; + }; //turn a string into an array of strings (naiive for now, lumped later) + + + var splitWords = function splitWords(str) { + var result = []; + var arr = []; //start with a naiive split + + str = str || ''; + + if (typeof str === 'number') { + str = String(str); + } + + if (isArray$2(str)) { + return str; + } + + var words = str.split(naiiveSplit); + + for (var i = 0; i < words.length; i++) { + //split 'one-two' + if (hasHyphen(words[i]) === true) { + arr = arr.concat(splitHyphens(words[i])); + continue; + } + + arr.push(words[i]); + } //greedy merge whitespace+arr to the right + + + var carry = ''; + + for (var _i = 0; _i < arr.length; _i++) { + var word = arr[_i]; //if it's more than a whitespace + + if (wordlike.test(word) === true && notWord.hasOwnProperty(word) === false && isBoundary.test(word) === false) { + //put whitespace on end of previous term, if possible + if (result.length > 0) { + result[result.length - 1] += carry; + result.push(word); + } else { + //otherwise, but whitespace before + result.push(carry + word); + } + + carry = ''; + } else { + carry += word; + } + } //handle last one + + + if (carry) { + if (result.length === 0) { + result[0] = ''; + } + + result[result.length - 1] += carry; //put it on the end + } // combine 'one / two' + + + result = combineSlashes(result); // remove empty results + + result = result.filter(function (s) { + return s; + }); + return result; + }; + + var _02Words = splitWords; + + var isArray$3 = function isArray(arr) { + return Object.prototype.toString.call(arr) === '[object Array]'; + }; + /** turn a string into an array of Phrase objects */ + + + var fromText = function fromText() { + var text = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ''; + var world = arguments.length > 1 ? arguments[1] : undefined; + var pool = arguments.length > 2 ? arguments[2] : undefined; + var sentences = null; //a bit of validation, first + + if (typeof text !== 'string') { + if (typeof text === 'number') { + text = String(text); + } else if (isArray$3(text)) { + sentences = text; + } + } //tokenize into words + + + sentences = sentences || _01Sentences(text, world); + sentences = sentences.map(function (str) { + return _02Words(str); + }); //turn them into proper objects + + pool = pool || new Pool_1(); + var phrases = sentences.map(function (terms) { + terms = terms.map(function (str) { + var term = new Term_1(str); + pool.add(term); + return term; + }); //add next/previous ids + + _linkTerms(terms); //return phrase objects + + var p = new Phrase_1(terms[0].id, terms.length, pool); + p.cache.terms = terms; + return p; + }); //return them ready for a Document object + + return phrases; + }; + + var _01Tokenizer = fromText; + + var fromJSON = function fromJSON(json, world) { + var pool = new Pool_1(); + var phrases = json.map(function (p, k) { + var terms = p.terms.map(function (o, i) { + var term = new Term_1(o.text); + term.pre = o.pre !== undefined ? o.pre : ''; + + if (o.post === undefined) { + o.post = ' '; //no given space for very last term + + if (i >= p.terms.length - 1) { + o.post = '. '; + + if (k >= p.terms.length - 1) { + o.post = '.'; + } + } + } + + term.post = o.post !== undefined ? o.post : ' '; + + if (o.tags) { + o.tags.forEach(function (tag) { + return term.tag(tag, '', world); + }); + } + + pool.add(term); + return term; + }); //add prev/next links + + _linkTerms(terms); // return a proper Phrase object + + return new Phrase_1(terms[0].id, terms.length, pool); + }); + return phrases; + }; + + var fromJSON_1 = fromJSON; + + var _version = '13.5.0X003'; + + var _data = { + "Comparative": "true¦better", + "Superlative": "true¦earlier", + "PresentTense": "true¦is,sounds", + "Value": "true¦a few", + "Noun": "true¦a5b4c2f1here,ie,lit,m0no doubt,pd,tce;a,d;t,y;a,ca,o0;l,rp;a,l;d,l,rc", + "Copula": "true¦a1is,w0;as,ere;m,re", + "PastTense": "true¦be3came,d2had,lied,meant,sa2taken,w0;as,e0;nt,re;id;en,gan", + "Condition": "true¦if,lest,unless", + "Gerund": "true¦accord0be0develop0go0result0stain0;ing", + "Negative": "true¦n0;ever,o0;!n,t", + "QuestionWord": "true¦how3wh0;at,e1ich,o0y;!m,se;n,re; come,'s", + "Plural": "true¦records", + "Conjunction": "true¦&,aEbAcuz,how8in caDno7o6p4supposing,t1vers5wh0yet;eth8ile;h0o;eref9o0;!uC;l0rovided that;us;r,therwi6; matt1r;!ev0;er;e0ut;cau1f0;ore;se;lthou1nd,s 0;far as,if;gh", + "Pronoun": "true¦'em,elle,h4i3me,ourselves,she5th1us,we,you0;!rself;e0ou;m,y;!l,t;e0im;!'s", + "Singular": "true¦0:0Z;1:12;a0Yb0Mc0Dd06e04fZgUhQiPjel0kitty,lOmKnJoIpEquestion mark,rCs7t4u2womY;nc0Ts 2;doll0Fst0H; rex,a3h2ic,ragedy,v show;ere,i1;l0x return;i5ky,omeone,t2uper bowl,yst0Y;ep3ri1u2;de0Rff;faOmoO;st0Nze;al0i1o2;om,se;a4i0Kl06r3u2;dMrpoE;erogaWobl0P;rt,te0J;bjTceHthers;othi1umb0F;a4ee05o2;del,m2nopo0th0D;!my;n,yf0;i0unch;ci1nsect;ead start,o2;l0me3u2;se;! run;adf0entlem5irlZlaci04od,rand3u2;l0y; slam,fa2mo2;th01;an;a5ella,ly,ol0r3un2;di1;iTo2;ntiWsN;mi0thV;conomy,gg,ner5veWx2;ampQecu7;ad7e4innSo2ragonf0ude;cumentFg2i0l0or;gy;ath,t2;ec2;tive;!dy;a8eili1h6i4o2redit card;ttage,u2;riJsin;ty,vil w2;ar;andeliGocol2;ate;n2rD;ary;aAel0lesHo6r4u2;n2tterf0;ti1;eakfast,o2;!th8;dy,tt4y2;!fri2;end;le;nki1r2;ri2;er;d4l0noma0u2;nt;ly; homin4verti2;si1;ng;em", + "Actor": "true¦aJbGcFdCengineIfAgardenIh9instructPjournalLlawyIm8nurse,opeOp5r3s1t0;echnCherapK;ailNcientJoldiGu0;pervKrgeon;e0oofE;ceptionGsearC;hotographClumbColi1r0sychologF;actitionBogrammB;cem6t5;echanic,inist9us4;airdress8ousekeep8;arm7ire0;fight6m2;eputy,iet0;ici0;an;arpent2lerk;ricklay1ut0;ch0;er;ccoun6d2ge7r0ssis6ttenda7;chitect,t0;ist;minist1v0;is1;rat0;or;ta0;nt", + "Honorific": "true¦a03b00cSdReQfiLgKhon,jr,king,lJmEoDp8queen,r4s0taoiseach,vice7;e1fc,gt,ir,r,u0;ltTpt,rg;c0nDrgeaL;ond liJretary;abbi,e0;ar1pAs,v0;!erend; admirY;astPhd,r0vt;esideEi1of0;!essN;me mini5nce0;!ss;fficOp,rd;a3essrs,i2lle,me,r1s0;!tr;!s;stK;gistrate,j,r6yF;i3lb,t;en,ov;eld mar3rst l0;ady,i0;eutena0;nt;shG;sq,xcellency;et,oct6r,utchess;apt6hance4mdr,o0pl;lonel,m2ngress0unci3;m0wom0;an;dr,mand5;ll0;or;!ain;ldg,rig0;!adi0;er;d0sst,tty,yatullah;j,m0v;!ir0;al", + "SportsTeam": "true¦0:1A;1:1H;2:1G;a1Eb16c0Td0Kfc dallas,g0Ihouston 0Hindiana0Gjacksonville jagua0k0El0Bm01newToQpJqueens parkIreal salt lake,sAt5utah jazz,vancouver whitecaps,w3yW;ashington 3est ham0Rh10;natio1Oredski2wizar0W;ampa bay 6e5o3;ronto 3ttenham hotspur;blue ja0Mrapto0;nnessee tita2xasC;buccanee0ra0K;a7eattle 5heffield0Kporting kansas0Wt3;. louis 3oke0V;c1Frams;marine0s3;eah15ounG;cramento Rn 3;antonio spu0diego 3francisco gJjose earthquak1;char08paA; ran07;a8h5ittsburgh 4ortland t3;imbe0rail blaze0;pirat1steele0;il3oenix su2;adelphia 3li1;eagl1philNunE;dr1;akland 3klahoma city thunder,rlando magic;athle0Mrai3;de0; 3castle01;england 7orleans 6york 3;city fc,g4je0FknXme0Fred bul0Yy3;anke1;ian0D;pelica2sain0C;patrio0Brevolut3;ion;anchester Be9i3ontreal impact;ami 7lwaukee b6nnesota 3;t4u0Fvi3;kings;imberwolv1wi2;rewe0uc0K;dolphi2heat,marli2;mphis grizz3ts;li1;cXu08;a4eicesterVos angeles 3;clippe0dodDla9; galaxy,ke0;ansas city 3nE;chiefs,roya0E; pace0polis colU;astr06dynamo,rockeTtexa2;olden state warrio0reen bay pac3;ke0;.c.Aallas 7e3i05od5;nver 5troit 3;lio2pisto2ti3;ge0;broncZnuggeM;cowbo4maver3;ic00;ys; uQ;arCelKh8incinnati 6leveland 5ol3;orado r3umbus crew sc;api5ocki1;brow2cavalie0india2;bengaWre3;ds;arlotte horAicago 3;b4cubs,fire,wh3;iteB;ea0ulR;diff3olina panthe0; c3;ity;altimore 9lackburn rove0oston 5rooklyn 3uffalo bilN;ne3;ts;cel4red3; sox;tics;rs;oriol1rave2;rizona Ast8tlanta 3;brav1falco2h4u3;nited;aw9;ns;es;on villa,r3;os;c5di3;amondbac3;ks;ardi3;na3;ls", + "Uncountable": "true¦a1Ib1Ac11d0Ye0Rf0Lg0Hh0Ci08j07knowled1Hl02mUnews,oTpQrLsAt5vi4w0;a2ea05i1oo0;d,l;ldlife,ne;rmth,t17;neg0Yol06tae;e3h2oothpaste,r0una;affPou0;ble,sers,t;ermod1Eund12;a,nnis;a8cene04eri0Oh7il6kittl0Onow,o5p3t1u0;g0Rnshi0H;ati1De0;am,el;ace16e0;ci0Jed;ap,cc0U;k,v0T;eep,ingl0G;d04fe10l0nd;m0St;a3e1ic0;e,ke0D;c0laxa09search;ogni08rea08;bi09in;aJe1hys10last5o0ressV;lit0Zrk,w0J;a0Vtrol;bstetr0Xil,xygen;a5e3ilk,o2u0;mps,s0;ic;nGo0A;a0chan0S;slZt;chine0il,themat0Q; learn05ry;aught08e2i1ogi0Nu0;ck,g0C;ce,ghtn02ngui0LteratH;a0isG;th04;ewel7usti0G;ce,mp0nformaOtself;a0ortan0E;ti0;en0C;a3isto2o0;ck0mework,n0spitali06;ey;ry;ir,libut,ppi7;en01o1r0um,ymna08;a6ound;l0ssip;d,f;i4lour,o1urnit0;ure;od,rgive0uriNwl;ne0;ss;c6sh;conomZduca5lectr4n2quip3thZvery0;body,o0thE;ne;joy0tertain0;ment;iciNonU;tiF;ar1iabet0raugh1;es;ts;a7elcius,h3ivPl2o0urrency;al,ld w0nfusiAttA;ar;assMoth2;aos,e0;e1w0;ing;se;r4sh;a4eef,i1lood,owls,read,utt0;er;lliar1s0;on;ds;g0ss;ga0;ge;c6dvi5ero3ir2mnes1rt,thl0;et7;ty;craft;b4d0naut4;ynam3;ce;id,ou0;st0;ics", + "Infinitive": "true¦0:6K;1:6Y;2:57;3:6W;4:6V;5:5Z;6:67;7:6U;8:6Q;9:6I;A:6S;B:6P;C:6Z;D:6D;E:56;F:5P;a6Cb61c52d4Ae3Uf3Hg3Bh34i2Rj2Pk2Nl2Fm25n22o1Xp1Iques3Ir0Qs05tXuSvOwHyG;awn,ield;aJe1Yhist6iIoGre65;nd0rG;k,ry;pe,sh,th0;lk,nHrGsh,tDve;n,raC;d0t;aIiGo7;eGsB;!w;l6Cry;nHpGr4se;gra4Mli3Z;dGi7lo5Spub3O;erGo;mi58w1I;aMeLhKoJrHuGwi8;ne,rn;aGe0Mi5Nu8y;de,in,nsf0p,v5F;r2XuD;ank,reat2N;nd,st;lk,rg1Ps7;aZcWeVhTi4Akip,lSmRnee3Jo4YpQtJuGwitD;bmBck,ff0gge8ppHrGspe5;ge,pri1rou4Vvi3;ly,o34;aLeKoJrHuG;dy,mb6;aEeGi3;ngth2Dss,tD;p,re;m,p;in,ke,r0Qy;laFoil,rink6;e1Xi6o3H;am,ip;a2iv0oG;ck,ut;arDem,le5n1r3tt6;aHo2rG;atDew;le,re;il,ve;a05eIisk,oHuG;in,le,sh;am,ll;a01cZdu9fYgXje5lUmTnt,pQquPsKtJvGwa5O;eGiew,o34;al,l,rG;se,t;aEi2u40;eJi8oItG;!o2rG;i5uc1Y;l3rt;mb6nt,r3;e8i2;air,eHlGo3ZreseC;a9y;at;aEemb0i3Vo3;aHeGi3y;a1nt;te,x;a56r0I;act1Wer,le5u1;a11ei3k5IoGyc6;gni2Anci6rd;ch,li29s5G;i1nG;ge,k;aTerSiRlOoMrIuG;b1Zll,mp,rGsh;cha1s4J;ai1eIiCoG;cGdu9greAhibBmi1te8vi2T;eAlaim;di5pa2ss,veC;iCp,rtr3ZsGur;e,t;aHuG;g,n4;n,y;ck,le;fo30mBsi8;ck,iCrt4Fss,u1;bJccur,ff0pera7utweIverGwe;co40lap,ta20u1wG;helm;igh;ser3taE;eHotG;e,i9;ed,gle5;aLeKiIoHuG;ltip3Crd0;nit11ve;nGrr10;d,g6us;asu2lt,n0Nr4;intaEna4rHtG;ch,t0;ch,kGry;et;aLeKiIoGu1B;aGck,ok,ve;d,n;ft,ke,mBnGst2Wve;e,k;a2Dc0Et;b0Nck,uG;gh,nD;iGno2Z;ck,ll,ss;am,oEuG;d4mp;gno2mQnGss3C;cOdica7flu0MhNsKtIvG;eGol3;nt,st;erGrodu9;a5fe2;i8tG;aGru5;ll;abBibB;lu1Er1C;agi22pG;lemeCo20ro3;aKeIi2oHuG;nt,rry;n02pe,st;aGlp;d,t;nd6ppGrm,te;en;aKloAove1MrIuG;arGeAi13;ant33d;aGip,umb6;b,sp;in,th0ze;aQeaPiNlLoIracHuncG;ti3D;tu2;cus,lHrG;ce,eca8m,s2V;d,l1Z;aFoG;at,od,w;gu2lGniFx;e,l;r,tu2;il,vG;or;a13cho,le5mSnPstNvalua7xG;a0AcLerKi8pGte17;a16eHi2laEoGreA;rt,se;ct,riG;en9;ci1t;el,han4;abGima7;liF;ab6couXdHfor9ga4han9j03riDsu2t0vG;isi2Qy;!u2;body,er4pG;hasiGow0;ze;a06eUiLoKrHuG;mp;aHeAiG;ft;g,in;d4ubt;ff0p,re5sHvG;iYor9;aKcHliGmiApl16tinguiF;ke;oGuA;uGv0;ra4;gr1TppG;ear,ro3;cNem,fLliv0ma0Dny,pKsHterG;mi0E;cribe,er3iHtrG;oy;gn,re;a09e08i5osB;eGi09y;at,ct;iIlHrG;ea1;a2i05;de;ma4n9re,te;a0Ae09h06i7l04oJrG;aHeGoAuFy;a7dB;ck,ve;llZmSnHok,py,uGv0;gh,nt;cePdu5fMsKtIvG;eGin9;rt,y;aEin0SrG;a8ibu7ol;iGtitu7;d0st;iHoGroC;rm;gu2rm;rn;biLfoKmaJpG;a2laE;in;re;nd;rt;ne;ap1e5;aGip,o1;im,w;aHeG;at,ck,w;llen4n4r4se;a1nt0;ll,ncIrGt0u1;eGry;!en;el;aPeMloLoJruFuG;lGry;ly;sh;a8mb,o8rrGth0un9;ow;ck;ar,lHnefBtrG;ay;ie3ong;ng,se;band0Jc0Bd06ffo05gr04id,l01mu1nYppTrQsKttGvoid,waB;acIeHra5;ct;m0Fnd;h,k;k,sG;eIiHocia7uG;me;gn,st;mb6rt;le;chHgGri3;ue;!i3;eaJlIroG;aDve;ch;aud,y;l,r;noun9sw0tG;icipa7;ce;lHt0;er;e4ow;ee;rd;aRdIju8mBoR;it;st;!reA;ss;cJhie3knowled4tiva7;te;ge;ve;eIouCu1;se;nt;pt;on", + "Unit": "true¦0:19;a14b12c0Od0Ne0Lf0Gg0Ch09in0Hjoule0k02l00mNnMoLpIqHsqCt7volts,w6y4z3°2µ1;g,s;c,f,n;b,e2;a0Nb,d0Dears old,o1;tt0H;att0b;able4b3d,e2on1sp;!ne0;a2r0D;!l,sp;spo04; ft,uare 1;c0Id0Hf3i0Fkilo0Jm1ya0E;e0Mil1;e0li0H;eet0o0D;t,uart0;ascals,e2i1ou0Pt;c0Mnt0;rcent,t02;hms,uYz;an0JewtT;/s,b,e9g,i3l,m2p1²,³;h,s;!²;!/h,cro5l1;e1li08;! pFs1²;! 1;anEpD;g06s0B;gQter1;! 2s1;! 1;per second;b,i00m,u1x;men0x0;b,elvin0g,ilo2m1nR;!/h,ph,²;byZgXmeter1;! p2s1;! p1;er1; hour;e1g,r0z;ct1rtz0;aXogQ;al2b,igAra1;in0m0;!l1;on0;a4emtPl2t1;²,³; oz,uid ou1;nce0;hrenheit0rad0;b,x1;abyH;eciCg,l,mA;arat0eAg,m9oulomb0u1;bic 1p0;c5d4fo3i2meAya1;rd0;nch0;ot0;eci2;enti1;me4;!²,³;lsius0nti1;g2li1me1;ter0;ram0;bl,y1;te0;c4tt1;os1;eco1;nd0;re0;!s", + "Organization": "true¦0:46;a3Ab2Qc2Ad21e1Xf1Tg1Lh1Gi1Dj19k17l13m0Sn0Go0Dp07qu06rZsStFuBv8w3y1;amaha,m0Xou1w0X;gov,tu2S;a3e1orld trade organizati41;lls fargo,st1;fie22inghou16;l1rner br3D;-m11gree31l street journ25m11;an halNeriz3Wisa,o1;dafo2Gl1;kswagLvo;bs,kip,n2ps,s1;a tod2Rps;es35i1;lev2Xted natio2Uv; mobi2Kaco bePd bMeAgi frida9h3im horto2Tmz,o1witt2W;shiba,y1;ota,s r Y;e 1in lizzy;b3carpen33daily ma2Xguess w2holli0rolling st1Ms1w2;mashing pumpki2Ouprem0;ho;ea1lack eyed pe3Fyrds;ch bo1tl0;ys;l2s1;co,la m12;efoni07us;a6e4ieme2Gnp,o2pice gir5ta1ubaru;rbucks,to2N;ny,undgard1;en;a2Rx pisto1;ls;few25insbu26msu1X;.e.m.,adiohead,b6e3oyal 1yan2X;b1dutch she4;ank;/max,aders dige1Ed 1vl32;bu1c1Uhot chili peppe2Klobst28;ll;c,s;ant2Vizno2F;an5bs,e3fiz24hilip morrBi2r1;emier27octer & gamb1Rudenti14;nk floyd,zza hut;psi28tro1uge08;br2Qchina,n2Q; 2ason1Xda2G;ld navy,pec,range juli2xf1;am;us;a9b8e5fl,h4i3o1sa,wa;kia,tre dame,vart1;is;ke,ntendo,ss0K;l,s;c,st1Etflix,w1; 1sweek;kids on the block,york08;a,c;nd1Us2t1;ional aca2Fo,we0Q;a,cYd0O;aAcdonald9e5i3lb,o1tv,yspace;b1Nnsanto,ody blu0t1;ley crue,or0O;crosoft,t1;as,subisO;dica3rcedes2talli1;ca;!-benz;id,re;'s,s;c's milk,tt13z1Y;'ore09a3e1g,ittle caesa1Ktd;novo,x1;is,mark; pres5-z-boy,bour party;atv,fc,kk,m1od1K;art;iffy lu0Lo3pmorgan1sa;! cha1;se;hnson & johns1Sy d1R;bm,hop,n1tv;c,g,te1;l,rpol; & m,asbro,ewlett-packaTi3o1sbc,yundai;me dep1n1J;ot;tac1zbollah;hi;eneral 6hq,l5mb,o2reen d0Iu1;cci,ns n ros0;ldman sachs,o1;dye1g0B;ar;axo smith kliZencore;electr0Im1;oto0V;a3bi,da,edex,i1leetwood mac,oGrito-l0A;at,nancial1restoV; tim0;cebook,nnie mae;b06sa,u3xxon1; m1m1;ob0H;!rosceptics;aiml0Ae5isney,o3u1;nkin donuts,po0Wran dur1;an;j,w j1;on0;a,f leppa3ll,p2r spiegZstiny's chi1;ld;eche mode,t;rd;aEbc,hBi9nn,o3r1;aigsli5eedence clearwater reviv1ossra05;al;!ca c5l4m1o0Ast05;ca2p1;aq;st;dplMgate;ola;a,sco1tigroup;! systems;ev2i1;ck fil-a,na daily;r0Hy;dbury,pital o1rl's jr;ne;aGbc,eCfAl6mw,ni,o2p,r1;exiteeWos;ei3mbardiJston 1;glo1pizza;be;ng;ack & deckFo2ue c1;roX;ckbuster video,omingda1;le; g1g1;oodriN;cht3e ge0n & jer2rkshire hathaw1;ay;ryH;el;nana republ3s1xt5y5;f,kin robbi1;ns;ic;bXcSdidRerosmith,ig,lLmFnheuser-busEol,ppleAr7s3t&t,v2y1;er;is,on;hland2s1;n,ociated F; o1;il;by4g2m1;co;os; compu2bee1;'s;te1;rs;ch;c,d,erican3t1;!r1;ak; ex1;pre1;ss; 4catel2t1;air;!-luce1;nt;jazeera,qae1;da;as;/dc,a3er,t1;ivisi1;on;demy of scienc0;es;ba,c", + "Demonym": "true¦0:16;1:13;a0Wb0Nc0Cd0Ae09f07g04h02iYjVkTlPmLnIomHpDqatari,rBs7t5u4v3wel0Rz2;am0Fimbabwe0;enezuel0ietnam0H;g9krai1;aiwThai,rinida0Iu2;ni0Qrkmen;a4cot0Ke3ingapoOlovak,oma0Tpa05udRw2y0X;edi0Kiss;negal0Br08;mo0uU;o6us0Lw2;and0;a3eru0Hhilipp0Po2;li0Ertugu06;kist3lesti1na2raguay0;ma1;ani;amiZi2orweP;caragu0geri2;an,en;a3ex0Mo2;ngo0Erocc0;cedo1la2;gasy,y08;a4eb9i2;b2thua1;e0Dy0;o,t02;azakh,eny0o2uwaiti;re0;a2orda1;ma0Bp2;anN;celandic,nd4r2sraeli,ta02vo06;a2iT;ni0qi;i0oneV;aiDin2ondur0unN;di;amDe2hanai0reek,uatemal0;or2rm0;gi0;i2ren7;lipino,n4;cuadoVgyp6ngliJsto1thiopi0urope0;a2ominXut4;niH;a9h6o4roa3ub0ze2;ch;ti0;lom2ngol5;bi0;a6i2;le0n2;ese;lifor1m2na3;bo2eroo1;di0;angladeshi,el8o6r3ul2;gaG;aziBi2;ti2;sh;li2s1;vi0;aru2gi0;si0;fAl7merBngol0r5si0us2;sie,tr2;a2i0;li0;gent2me1;ine;ba1ge2;ri0;ni0;gh0r2;ic0;an", + "Possessive": "true¦anyAh5its,m3noCo1sometBthe0yo1;ir1mselves;ur0;!s;i8y0;!se4;er1i0;mse2s;!s0;!e0;lf;o1t0;hing;ne", + "Currency": "true¦$,aud,bScQdLeurKfJgbp,hkd,iIjpy,kGlEp8r7s3usd,x2y1z0¢,£,¥,ден,лв,руб,฿,₡,₨,€,₭,﷼;lotySł;en,uanR;af,of;h0t5;e0il5;k0q0;elM;iel,oubleLp,upeeL;e2ound st0;er0;lingI;n0soH;ceGn0;ies,y;e0i8;i,mpi7;n,r0wanzaCyatC;!onaBw;ls,nr;ori7ranc9;!o8;en3i2kk,o0;b0ll2;ra5;me4n0rham4;ar3;ad,e0ny;nt1;aht,itcoin0;!s", + "City": "true¦a2Wb26c1Wd1Re1Qf1Og1Ih1Ai18jakar2Hk0Zl0Tm0Gn0Co0ApZquiYrVsLtCuBv8w3y1z0;agreb,uri1Z;ang1Te0okohama;katerin1Hrev34;ars3e2i0rocl3;ckl0Vn0;nipeg,terth0W;llingt1Oxford;aw;a1i0;en2Hlni2Z;lenc2Uncouv0Gr2G;lan bat0Dtrecht;a6bilisi,e5he4i3o2rondheim,u0;nVr0;in,ku;kyo,ronIulouC;anj23l13miso2Jra2A; haJssaloni0X;gucigalpa,hr2Ol av0L;i0llinn,mpe2Bngi07rtu;chu22n2MpT;a3e2h1kopje,t0ydney;ockholm,uttga12;angh1Fenzh1X;o0KvZ;int peters0Ul3n0ppo1F; 0ti1B;jo0salv2;se;v0z0Q;adU;eykjavik,i1o0;me,sario,t25;ga,o de janei17;to;a8e6h5i4o2r0ueb1Qyongya1N;a0etor24;gue;rt0zn24; elizabe3o;ls1Grae24;iladelph1Znom pe07oenix;r0tah tik19;th;lerJr0tr10;is;dessa,s0ttawa;a1Hlo;a2ew 0is;delTtaip0york;ei;goya,nt0Upl0Uv1R;a5e4i3o1u0;mb0Lni0I;nt0scH;evideo,real;l1Mn01skolc;dellín,lbour0S;drid,l5n3r0;ib1se0;ille;or;chest0dalWi0Z;er;mo;a4i1o0vAy01;nd00s angel0F;ege,ma0nz,sbZverpo1;!ss0;ol; pla0Iusan0F;a5hark4i3laipeda,o1rak0uala lump2;ow;be,pavog0sice;ur;ev,ng8;iv;b3mpa0Kndy,ohsiu0Hra0un03;c0j;hi;ncheMstanb0̇zmir;ul;a5e3o0; chi mi1ms,u0;stI;nh;lsin0rakliG;ki;ifa,m0noi,va0A;bu0SiltD;alw4dan3en2hent,iza,othen1raz,ua0;dalaj0Gngzhou;bu0P;eUoa;sk;ay;es,rankfu0;rt;dmont4indhovU;a1ha01oha,u0;blRrb0Eshanbe;e0kar,masc0FugavpiJ;gu,je0;on;a7ebu,h2o0raioJuriti01;lo0nstanJpenhagNrk;gFmbo;enn3i1ristchur0;ch;ang m1c0ttagoL;ago;ai;i0lgary,pe town,rac4;ro;aHeBirminghWogoAr5u0;char3dap3enos air2r0sZ;g0sa;as;es;est;a2isba1usse0;ls;ne;silPtisla0;va;ta;i3lgrade,r0;g1l0n;in;en;ji0rut;ng;ku,n3r0sel;celo1ranquil0;la;na;g1ja lu0;ka;alo0kok;re;aBb9hmedabad,l7m4n2qa1sh0thens,uckland;dod,gabat;ba;k0twerp;ara;m5s0;terd0;am;exandr0maty;ia;idj0u dhabi;an;lbo1rh0;us;rg", + "Abbreviation": "true¦a0Tb0Qc0Kd0Ie0Ff0Cg0Ah08i06j04k02l00mRnOoNpIqHrFs9t6u5v2w0yb,µg;is0r,y0L;!c;a,b,e1i0ol,s,t;tro,vo;r,t;niv,safa,t;b1ce,d,e0sp;l,mp,nn,x;!l,sp;ask,e3fc,gt,i2q1r,s,t,u0;pt,rg;! ft;r,tu;c,nVp0;!t;b,d,e0;pSs,v;t,ue;a,d,enn3hd,l,p,r1s0t,vt;!eud;ef,o0;b,f,n;!a;ct,kla,nt,p,rd,z;e0ov;b0e;!r;a7b,d,essrs,g,i4l3m2p1rHs0t;!tr;h,s;!e;!le;!n1s0;c,ter;!n;!j,r,sc;at,b,it,lb,m,ng,t0x;!d;an6b,g,m0;!ph;an,d,r,u0;l,n;a,da,e,n0;c,f;g,on,r0wy,z;!s;a0b,en,ov;!l;e1ig,l0m,r,t,y;! oz,a;b,m;a,g,ng,s1tc,x0;!p;p,q,t;ak,e0g,ist,l,m,r;c,f,pt,t;a3ca,g,l,m2o0pl,res,t,yn;!l0mdr,nn,rp;!o;!dr;!l0pt;!if;a,c,l1r0;ig,os;!dg,vd;d4l3p2r1ss0tty,ug,ve;n,t;c,iz;prox,r,t;!ta;!j,m,v", + "Country": "true¦0:38;1:2L;a2Wb2Dc21d1Xe1Rf1Lg1Bh19i13j11k0Zl0Um0Gn05om3CpZqat1JrXsKtCu6v4wal3yemTz2;a24imbabwe;es,lis and futu2X;a2enezue31ietnam;nuatu,tican city;.5gTkraiZnited 3ruXs2zbeE;a,sr;arab emirat0Kkingdom,states2;! of am2X;k.,s.2; 27a.;a7haBimor-les0Bo6rinidad4u2;nis0rk2valu;ey,me2Xs and caic1T; and 2-2;toba1J;go,kel0Ynga;iw2Vji2nz2R;ki2T;aCcotl1eBi8lov7o5pa2Bri lanka,u4w2yr0;az2ed9itzerl1;il1;d2Qriname;lomon1Vmal0uth 2;afr2IkLsud2O;ak0en0;erra leoEn2;gapo1Wt maart2;en;negKrb0ychellY;int 2moa,n marino,udi arab0;hele24luc0mart1Z;epublic of ir0Com2Cuss0w2;an25;a3eHhilippinTitcairn1Ko2uerto riM;l1rtugE;ki2Bl3nama,pua new0Tra2;gu6;au,esti2;ne;aAe8i6or2;folk1Gth3w2;ay; k2ern mariana1B;or0M;caragua,ger2ue;!ia;p2ther18w zeal1;al;mib0u2;ru;a6exi5icro09o2yanm04;ldova,n2roc4zamb9;a3gol0t2;enegro,serrat;co;c9dagascZl6r4urit3yot2;te;an0i14;shall0Vtin2;ique;a3div2i,ta;es;wi,ys0;ao,ed00;a5e4i2uxembourg;b2echtenste10thu1E;er0ya;ban0Gsotho;os,tv0;azakh1De2iriba02osovo,uwait,yrgyz1D;eling0Jnya;a2erF;ma15p1B;c6nd5r3s2taly,vory coast;le of m19rael;a2el1;n,q;ia,oI;el1;aiSon2ungary;dur0Mg kong;aAermany,ha0Pibralt9re7u2;a5ern4inea2ya0O;!-biss2;au;sey;deloupe,m,tema0P;e2na0M;ce,nl1;ar;bTmb0;a6i5r2;ance,ench 2;guia0Dpoly2;nes0;ji,nl1;lklandTroeT;ast tim6cu5gypt,l salv5ngl1quatorial3ritr4st2thiop0;on0; guin2;ea;ad2;or;enmark,jibou4ominica3r con2;go;!n B;ti;aAentral african 9h7o4roat0u3yprQzech2; 8ia;ba,racao;c3lo2morPngo-brazzaville,okFsta r03te d'ivoiK;mb0;osD;i2ristmasF;le,na;republic;m2naTpe verde,yman9;bod0ero2;on;aFeChut00o8r4u2;lgar0r2;kina faso,ma,undi;azil,itish 2unei;virgin2; is2;lands;liv0nai4snia and herzegoviGtswaGuvet2; isl1;and;re;l2n7rmuF;ar2gium,ize;us;h3ngladesh,rbad2;os;am3ra2;in;as;fghaFlCmAn5r3ustr2zerbaijH;al0ia;genti2men0uba;na;dorra,g4t2;arct6igua and barbu2;da;o2uil2;la;er2;ica;b2ger0;an0;ia;ni2;st2;an", + "Region": "true¦0:1U;a20b1Sc1Id1Des1Cf19g13h10i0Xj0Vk0Tl0Qm0FnZoXpSqPrMsDtAut9v6w3y1zacatec22;o05u1;cat18kZ;a1est vi4isconsin,yomi14;rwick0shington1;! dc;er2i1;rgin1S;acruz,mont;ah,tar pradesh;a2e1laxca1DuscaA;nnessee,x1R;bas0Kmaulip1QsmJ;a6i4o2taf0Ou1ylh13;ffVrr00s0Y;me10no1Auth 1;cSdR;ber1Ic1naloa;hu0Sily;n2skatchew0Rxo1;ny; luis potosi,ta catari1I;a1hode7;j1ngp02;asth0Mshahi;inghai,u1;e1intana roo;bec,ensWreta0E;ara4e2rince edward1; isU;i,nnsylv1rnambu02;an14;!na;axa0Ndisha,h1klaho1Bntar1reg4x04;io;ayarit,eBo3u1;evo le1nav0L;on;r1tt0Rva scot0X;f6mandy,th1; 1ampton0;c3d2yo1;rk0;ako0Y;aroli0V;olk;bras0Xva01w1; 2foundland1;! and labrador;brunswick,hamp0jers1mexiJyork state;ey;a6i2o1;nta0Nrelos;ch3dlanBn2ss1;issippi,ouri;as geraGneso0M;igQoacQ;dhya,harasht04ine,ni3r1ssachusetts;anhao,y1;land;p1toba;ur;anca0e1incoln0ouis8;e1iH;ds;a1entucky,hul0A;ns08rnata0Dshmir;alis1iangxi;co;daho,llino2nd1owa;ia05;is;a2ert1idalEunA;ford0;mp0waii;ansu,eorgWlou5u1;an2erre1izhou,jarat;ro;ajuato,gdo1;ng;cester0;lori2uji1;an;da;sex;e4o2uran1;go;rs1;et;lawaErby0;a8ea7hi6o1umbrH;ahui4l3nnectic2rsi1ventry;ca;ut;iMorado;la;apEhuahua;ra;l8m1;bridge0peche;a5r4uck1;ingham0;shi1;re;emen,itish columb3;h2ja cal1sque,var2;iforn1;ia;guascalientes,l4r1;izo2kans1;as;na;a2ber1;ta;ba2s1;ka;ma", + "FemaleName": "true¦0:FY;1:G2;2:FR;3:FD;4:FC;5:FS;6:ER;7:EP;8:GF;9:EZ;A:GB;B:E5;C:G8;D:FO;E:FL;F:EG;aE2bD4cB8dAIe9Gf91g8Hh83i7Sj6Uk60l4Om38n2To2Qp2Fqu2Er1Os0Qt04ursu6vUwOyLzG;aJeHoG;e,la,ra;lGna;da,ma;da,ra;as7EeHol1TvG;et7onB9;le0sen3;an9endBNhiB4iG;lInG;if3AniGo0;e,f39;a,helmi0lGma;a,ow;aMeJiG;cHviG;an9XenG1;kCZtor3;da,l8Vnus,rG;a,nGoniD2;a,iDC;leGnesEC;nDLrG;i1y;aSePhNiMoJrGu6y4;acG3iGu0E;c3na,sG;h9Mta;nHrG;a,i;i9Jya;a5IffaCGna,s5;al3eGomasi0;a,l8Go6Xres1;g7Uo6WrHssG;!a,ie;eFi,ri8;bNliMmKnIrHs5tGwa0;ia0um;a,yn;iGya;a,ka,s5;a4e4iGmCAra;!ka;a,t5;at5it5;a05carlet2Ye04hUiSkye,oQtMuHyG;bFJlvi1;e,sHzG;an2Tet7ie,y;anGi8;!a,e,nG;aEe;aIeG;fGl3DphG;an2;cF8r6;f3nGphi1;d4ia,ja,ya;er4lv3mon1nGobh75;dy;aKeGirlBLo0y6;ba,e0i6lIrG;iGrBPyl;!d70;ia,lBV;ki4nIrHu0w0yG;la,na;i,leAon,ron;a,da,ia,nGon;a,on;l5Yre0;bMdLi9lKmIndHrGs5vannaE;aEi0;ra,y;aGi4;nt5ra;lBNome;e,ie;in1ri0;a02eXhViToHuG;by,thBK;bQcPlOnNsHwe0xG;an94ie,y;aHeGie,lC;ann8ll1marBFtB;!lGnn1;iGyn;e,nG;a,d7W;da,i,na;an9;hel53io;bin,erByn;a,cGkki,na,ta;helBZki;ea,iannDXoG;da,n12;an0bIgi0i0nGta,y0;aGee;!e,ta;a,eG;cARkaE;chGe,i0mo0n5EquCDvDy0;aCCelGi9;!e,le;een2ia0;aMeLhJoIrG;iGudenAW;scil1Uyamva9;lly,rt3;ilome0oebe,ylG;is,lis;arl,ggy,nelope,r6t4;ige,m0Fn4Oo6rvaBBtHulG;a,et7in1;ricGsy,tA8;a,e,ia;ctav3deHfAWlGphAW;a,ga,iv3;l3t7;aQePiJoGy6;eHrG;aEeDma;ll1mi;aKcIkGla,na,s5ta;iGki;!ta;hoB2k8BolG;a,eBH;!mh;l7Tna,risF;dIi5PnHo23taG;li1s5;cy,et7;eAiCO;a01ckenz2eViLoIrignayani,uriBGyG;a,rG;a,na,tAS;i4ll9XnG;a,iG;ca,ka,qB4;a,chOkaNlJmi,nIrGtzi;aGiam;!n9;a,dy,erva,h,n2;a,dIi9JlG;iGy;cent,e;red;!e6;ae6el3G;ag4KgKi,lHrG;edi61isFyl;an2iGliF;nGsAM;a,da;!an,han;b08c9Ed06e,g04i03l01nZrKtJuHv6Sx87yGz2;a,bell,ra;de,rG;a,eD;h75il9t2;a,cSgOiJjor2l6In2s5tIyG;!aGbe5QjaAlou;m,n9S;a,ha,i0;!aIbALeHja,lCna,sGt53;!a,ol,sa;!l06;!h,m,nG;!a,e,n1;arIeHie,oGr3Kueri7;!t;!ry;et3IiB;elGi61y;a,l1;dGon,ue6;akranBy;iGlo36;a,ka,n9;a,re,s2;daGg2;!l2W;alCd2elGge,isBGon0;eiAin1yn;el,le;a0Ie08iWoQuKyG;d3la,nG;!a,dHe9SnGsAQ;!a,e9R;a,sAO;aB1cJelIiFlHna,pGz;e,iB;a,u;a,la;iGy;a2Ae,l25n9;is,l1GrHtt2uG;el6is1;aIeHi8na,rG;a6Zi8;lei,n1tB;!in1;aQbPd3lLnIsHv3zG;!a,be4Ket7z2;a,et7;a,dG;a,sGy;ay,ey,i,y;a,iaIlG;iGy;a8Ge;!n4F;b7Terty;!n5R;aNda,e0iLla,nKoIslARtGx2;iGt2;c3t3;la,nGra;a,ie,o4;a,or1;a,gh,laG;!ni;!h,nG;a,d4e,n4N;cNdon7Si6kes5na,rMtKurIvHxGy6;mi;ern1in3;a,eGie,yn;l,n;as5is5oG;nya,ya;a,isF;ey,ie,y;aZeUhadija,iMoLrIyG;lGra;a,ee,ie;istGy5B;a,en,iGy;!e,n48;ri,urtn9A;aMerLl99mIrGzzy;a,stG;en,in;!berlG;eGi,y;e,y;a,stD;!na,ra;el6PiJlInHrG;a,i,ri;d4na;ey,i,l9Qs2y;ra,s5;c8Wi5XlOma6nyakumari,rMss5LtJviByG;!e,lG;a,eG;e,i78;a5EeHhGi3PlCri0y;ar5Cer5Cie,leDr9Fy;!lyn73;a,en,iGl4Uyn;!ma,n31sF;ei72i,l2;a04eVilToMuG;anKdJliGst56;aHeGsF;!nAt0W;!n8X;i2Ry;a,iB;!anLcelCd5Vel71han6IlJni,sHva0yG;a,ce;eGie;fi0lCph4X;eGie;en,n1;!a,e,n36;!i10lG;!i0Z;anLle0nIrHsG;i5Qsi5Q;i,ri;!a,el6Pif1RnG;a,et7iGy;!e,f1P;a,e72iHnG;a,e71iG;e,n1;cLd1mi,nHqueliAsmin2Uvie4yAzG;min8;a8eHiG;ce,e,n1s;!lGsFt06;e,le;inHk2lCquelG;in1yn;da,ta;lPmNnMo0rLsHvaG;!na;aHiGob6U;do4;!belGdo4;!a,e,l2G;en1i0ma;a,di4es,gr5R;el9ogG;en1;a,eAia0o0se;aNeKilHoGyacin1N;ll2rten1H;aHdGlaH;a,egard;ry;ath0WiHlGnrietBrmiAst0W;en24ga;di;il75lKnJrGtt2yl75z6D;iGmo4Fri4G;etG;!te;aEnaE;ey,l2;aYeTiOlMold12rIwG;enGyne18;!dolC;acHetGisel9;a,chD;e,ieG;!la;adys,enGor3yn1Y;a,da,na;aJgi,lHna,ov71selG;a,e,le;da,liG;an;!n0;mYnIorgHrG;ald35i,m2Stru73;et7i5T;a,eGna;s1Nvieve;briel3Fil,le,rnet,yle;aReOio0loMrG;anHe9iG;da,e9;!cG;esHiGoi0G;n1s3V;!ca;!rG;a,en43;lHrnG;!an9;ec3ic3;rHtiGy8;ma;ah,rah;d0FileDkBl00mUn4ArRsMtLuKvG;aIelHiG;e,ta;in0Ayn;!ngel2H;geni1la,ni3R;h52ta;meral9peranJtG;eHhGrel6;er;l2Pr;za;iGma,nest29yn;cGka,n;a,ka;eJilImG;aGie,y;!liA;ee,i1y;lGrald;da,y;aTeRiMlLma,no4oJsIvG;a,iG;na,ra;a,ie;iGuiG;se;a,en,ie,y;a0c3da,nJsGzaH;aGe;!beG;th;!a,or;anor,nG;!a;in1na;en,iGna,wi0;e,th;aWeKiJoGul2U;lor51miniq3Yn30rGtt2;a,eDis,la,othGthy;ea,y;an09naEonAx2;anPbOde,eNiLja,lImetr3nGsir4U;a,iG;ce,se;a,iHla,orGphiA;es,is;a,l5J;dGrdG;re;!d4Mna;!b2CoraEra;a,d4nG;!a,e;hl3i0mMnKphn1rHvi1WyG;le,na;a,by,cHia,lG;a,en1;ey,ie;a,et7iG;!ca,el1Aka;arGia;is;a0Qe0Mh04i02lUoJrHynG;di,th3;istGy04;al,i0;lOnLrHurG;tn1D;aId28iGn28riA;!nG;a,e,n1;!l1S;n2sG;tanGuelo;ce,za;eGleD;en,t7;aIeoHotG;il4B;!pat4;ir8rIudG;et7iG;a,ne;a,e,iG;ce,sX;a4er4ndG;i,y;aPeMloe,rG;isHyG;stal;sy,tG;aHen,iGy;!an1e,n1;!l;lseHrG;!i8yl;a,y;nLrG;isJlHmG;aiA;a,eGot7;n1t7;!sa;d4el1PtG;al,el1O;cHlG;es7i3F;el3ilG;e,ia,y;iYlXmilWndVrNsLtGy6;aJeIhGri0;erGleDrCy;in1;ri0;li0ri0;a2GsG;a2Fie;a,iMlKmeIolHrG;ie,ol;!e,in1yn;lGn;!a,la;a,eGie,y;ne,y;na,sF;a0Di0D;a,e,l1;isBl2;tlG;in,yn;arb0CeYianXlVoTrG;andRePiIoHyG;an0nn;nwCok8;an2NdgKg0ItG;n27tG;!aHnG;ey,i,y;ny;etG;!t8;an0e,nG;da,na;i8y;bbi8nG;iBn2;ancGossom,ythe;a,he;ca;aRcky,lin9niBrNssMtIulaEvG;!erlG;ey,y;hHsy,tG;e,i0Zy8;!anG;ie,y;!ie;nGt5yl;adHiG;ce;et7iA;!triG;ce,z;a4ie,ra;aliy29b24d1Lg1Hi19l0Sm0Nn01rWsNthe0uJvIyG;anGes5;a,na;a,r25;drIgusHrG;el3;ti0;a,ey,i,y;hHtrG;id;aKlGt1P;eHi8yG;!n;e,iGy;gh;!nG;ti;iIleHpiB;ta;en,n1t7;an19elG;le;aYdWeUgQiOja,nHtoGya;inet7n3;!aJeHiGmI;e,ka;!mGt7;ar2;!belHliFmT;sa;!le;ka,sGta;a,sa;elGie;a,iG;a,ca,n1qG;ue;!t7;te;je6rea;la;!bHmGstas3;ar3;el;aIberHel3iGy;e,na;!ly;l3n9;da;aTba,eNiKlIma,yG;a,c3sG;a,on,sa;iGys0J;e,s0I;a,cHna,sGza;a,ha,on,sa;e,ia;c3is5jaIna,ssaIxG;aGia;!nd4;nd4;ra;ia;i0nHyG;ah,na;a,is,naE;c5da,leDmLnslKsG;haElG;inGyW;g,n;!h;ey;ee;en;at5g2nG;es;ie;ha;aVdiSelLrG;eIiG;anLenG;a,e,ne;an0;na;aKeJiHyG;nn;a,n1;a,e;!ne;!iG;de;e,lCsG;on;yn;!lG;iAyn;ne;agaJbHiG;!gaI;ey,i8y;!e;il;ah", + "Place": "true¦a07b05cZdYeXfVgRhQiOjfk,kMlKmHneEoDp9que,rd,s8t5u4v3w0yyz;is1y0;!o;!c;a,t;pYsafa,t;e1he 0;bronx,hamptons;nn,x;ask,fo,oho,t,under6yd;a2e1h0;l,x;k,nnK;!cifX;kla,nt;b1w eng0;land;!r;a1co,i0t,uc;dKnn;libu,nhattS;a0gw,hr;s,x;an0ul;!s;a0cn,da,ndianMst;!x;arlem,kg,nd,wy;a2re0;at 0enwich;britain,lak6;!y village;co,l0ra;!a;urope,verglad2;ak,en,fw,ist,own4xb;al4dg,gk,hina3l2o1r0t;es;lo,nn;!t;town;!if;cn,e0kk,lvd,rooklyn;l air,verly hills;frica,lta,m5ntarct2r1sia,tl0ve;!ant1;ct0iz;ic0; oce0;an;ericas,s", + "WeekDay": "true¦fri2mon2s1t0wednesd3;hurs1ues1;aturd1und1;!d0;ay0;!s", + "Month": "true¦aBdec9feb7j2mar,nov9oct1sep0;!t8;!o8;an3u0;l1n0;!e;!y;!u1;!ru0;ary;!em0;ber;pr1ug0;!ust;!il", + "Date": "true¦ago,t0weekend,yesterd2;mr2o0;d0morrow;ay;!w", + "FirstName": "true¦aEblair,cCdevBj8k6lashawn,m3nelly,quinn,re2sh0;ay,e0iloh;a,lby;g1ne;ar1el,org0;an;ion,lo;as8e0r9;ls7nyatta,rry;am0ess1ude;ie,m0;ie;an,on;as0heyenne;ey,sidy;lex1ndra,ubr0;ey;is", + "LastName": "true¦0:34;1:3B;2:39;3:2Y;4:2E;5:30;a3Bb31c2Od2Ee2Bf25g1Zh1Pi1Kj1Ek17l0Zm0Nn0Jo0Gp05rYsMtHvFwCxBy8zh6;a6ou,u;ng,o;a6eun2Uoshi1Kun;ma6ng;da,guc1Zmo27sh21zaR;iao,u;a7eb0il6o3right,u;li3Bs2;gn0lk0ng,tanabe;a6ivaldi;ssilj37zqu1;a9h8i2Go7r6sui,urn0;an,ynisJ;lst0Prr1Uth;at1Uomps2;kah0Vnaka,ylor;aEchDeChimizu,iBmiAo9t7u6zabo;ar1lliv2AzuE;a6ein0;l23rm0;sa,u3;rn4th;lva,mmo24ngh;mjon4rrano;midt,neid0ulz;ito,n7sa6to;ki;ch1dLtos,z;amBeag1Zi9o7u6;bio,iz,sD;b6dri1MgIj0Tme24osevelt,ssi,ux;erts,ins2;c6ve0F;ci,hards2;ir1os;aEeAh8ic6ow20ut1N;as6hl0;so;a6illips;m,n1T;ders5et8r7t6;e0Nr4;ez,ry;ers;h21rk0t6vl4;el,te0J;baBg0Blivei01r6;t6w1O;ega,iz;a6eils2guy5ix2owak,ym1E;gy,ka6var1K;ji6muW;ma;aEeCiBo8u6;ll0n6rr0Bssolini,ñ6;oz;lina,oKr6zart;al0Me6r0U;au,no;hhail4ll0;rci0ssi6y0;!er;eWmmad4r6tsu07;in6tin1;!o;aCe8i6op1uo;!n6u;coln,dholm;fe7n0Qr6w0J;oy;bv6v6;re;mmy,rs5u;aBennedy,imuAle0Lo8u7wo6;k,n;mar,znets4;bay6vacs;asY;ra;hn,rl9to,ur,zl4;aAen9ha3imen1o6u3;h6nYu3;an6ns2;ss2;ki0Es5;cks2nsse0D;glesi9ke8noue,shik7to,vano6;u,v;awa;da;as;aBe8itchcock,o7u6;!a3b0ghNynh;a3ffmann,rvat;mingw7nde6rN;rs2;ay;ns5rrQs7y6;asDes;an4hi6;moJ;a9il,o8r7u6;o,tierr1;ayli3ub0;m1nzal1;nd6o,rcia;hi;erAis9lor8o7uj6;ita;st0urni0;es;ch0;nand1;d7insteHsposi6vaL;to;is2wards;aCeBi9omin8u6;bo6rand;is;gu1;az,mitr4;ov;lgado,vi;nkula,rw7vi6;es,s;in;aFhBlarkAo6;h5l6op0rbyn,x;em7li6;ns;an;!e;an8e7iu,o6ristens5u3we;i,ng,u3w,y;!n,on6u3;!g;mpb7rt0st6;ro;ell;aBe8ha3lanco,oyko,r6yrne;ooks,yant;ng;ck7ethov5nnett;en;er,ham;ch,h8iley,rn6;es,i0;er;k,ng;dDl9nd6;ers6rA;en,on,s2;on;eks7iy8var1;ez;ej6;ev;ams", + "MaleName": "true¦0:CE;1:BL;2:C2;3:BT;4:B5;5:BZ;6:AT;7:9V;8:BD;9:AX;A:AO;aB4bA8c97d87e7Gf6Yg6Gh5Wi5Ij4Lk4Bl3Rm2Pn2Eo28p22qu20r1As0Qt06u05v00wNxavi3yGzB;aBor0;cBh8Ine;hCkB;!aB1;ar51eB0;ass2i,oCuB;sDu25;nEsDusB;oBsC;uf;ef;at0g;aJeHiCoByaAP;lfgang,odrow;lBn1O;bDey,frBJlB;aA5iB;am,e,s;e89ur;i,nde7sB;!l6t1;de,lCrr5yB;l1ne;lBt3;a93y;aEern1iBladimir;cCha0kt5CnceBrg9Bva0;!nt;ente,t5A;lentin49n8Yughn;lyss4Msm0;aTeOhKiIoErCyB;!l3ro8s1;av9QeBist0oy,um0;nt9Iv54y;bDd7XmBny;!as,mBoharu;aAYie,y;i83y;mBt9;!my,othy;adDeoCia7DomB;!as;!do7M;!de9;dErB;en8HrB;an8GeBy;ll,n8F;!dy;dgh,ic9Tnn3req,ts45;aRcotPeNhJiHoFpenc3tBur1Oylve8Hzym1;anDeBua7B;f0phAFvBwa7A;e57ie;!islaw,l6;lom1nA3uB;leyma8ta;dBl7Jm1;!n6;aDeB;lBrm0;d1t1;h6Sne,qu0Uun,wn,y8;aBbasti0k1Xl41rg40th,ymo9I;m9n;!tB;!ie,y;lCmBnti21q4Iul;!mAu4;ik,vato6V;aWeShe92iOoFuCyB;an,ou;b6LdCf9pe6QssB;!elAI;ol2Uy;an,bIcHdGel,geFh0landA9mEnDry,sCyB;!ce;coe,s;!a95nA;an,eo;l3Jr;e4Qg3n6olfo,ri68;co,ky;bAe9U;cBl6;ar5Oc5NhCkBo;!ey,ie,y;a85ie;gCid,ub5x,yBza;ansh,nS;g8WiB;na8Ss;ch5Yfa4lDmCndBpha4sh6Uul,ymo70;al9Yol2By;i9Ion;f,ph;ent2inB;cy,t1;aFeDhilCier62ol,reB;st1;!ip,lip;d9Brcy,tB;ar,e2V;b3Sdra6Ft44ul;ctav2Vliv3m96rFsCtBum8Uw5;is,to;aCc8SvB;al52;ma;i,l49vJ;athJeHiDoB;aBel,l0ma0r2X;h,m;cCg4i3IkB;h6Uola;hol5XkBol5X;!ol5W;al,d,il,ls1vB;il50;anBy;!a4i4;aWeTiKoFuCyB;l21r1;hamCr5ZstaB;fa,p4G;ed,mF;dibo,e,hamDis1XntCsBussa;es,he;e,y;ad,ed,mB;ad,ed;cGgu4kElDnCtchB;!e7;a78ik;house,o03t1;e,olB;aj;ah,hBk6;a4eB;al,l;hClv2rB;le,ri7v2;di,met;ck,hNlLmOnu4rHs1tDuricCxB;!imilian8Cwe7;e,io;eo,hCi52tB;!eo,hew,ia;eBis;us,w;cDio,k86lCqu6Gsha7tBv2;i2Hy;in,on;!el,oKus;achBcolm,ik;ai,y;amBdi,moud;adB;ou;aReNiMlo2RoIuCyB;le,nd1;cEiDkBth3;aBe;!s;gi,s;as,iaB;no;g0nn6RrenDuBwe7;!iB;e,s;!zo;am,on4;a7Bevi,la4SnDoBst3vi;!nB;!a60el;!ny;mCnBr67ur4Twr4T;ce,d1;ar,o4N;aIeDhaled,iBrist4Vu48y3B;er0p,rB;by,k,ollos;en0iEnBrmit,v2;!dCnBt5C;e0Yy;a7ri4N;r,th;na68rBthem;im,l;aYeQiOoDuB;an,liBst2;an,o,us;aqu2eJhnInGrEsB;eChBi7Bue;!ua;!ph;dBge;an,i,on;!aBny;h,s,th4X;!ath4Wie,nA;!l,sBy;ph;an,e,mB;!mA;d,ffGrDsB;sBus;!e;a5JemCmai8oBry;me,ni0O;i6Uy;!e58rB;ey,y;cHd5kGmFrDsCvi3yB;!d5s1;on,p3;ed,od,rBv4M;e4Zod;al,es,is1;e,ob,ub;k,ob,quB;es;aNbrahMchika,gKkeJlija,nuIrGsDtBv0;ai,sB;uki;aBha0i6Fma4sac;ac,iaB;h,s;a,vinBw2;!g;k,nngu52;!r;nacBor;io;im;in,n;aJeFina4VoDuByd56;be25gBmber4CsD;h,o;m3ra33sBwa3X;se2;aDctCitCn4ErB;be20m0;or;th;bKlJmza,nIo,rDsCyB;a43d5;an,s0;lEo4FrDuBv6;hi40ki,tB;a,o;is1y;an,ey;k,s;!im;ib;aQeMiLlenKoIrEuB;illerCsB;!tavo;mo;aDegBov3;!g,orB;io,y;dy,h57nt;nzaBrd1;lo;!n;lbe4Qno,ovan4R;ne,oDrB;aBry;ld,rd4U;ffr6rge;bri4l5rBv2;la1Zr3Eth,y;aReNiLlJorr0IrB;anDedBitz;!dAeBri24;ri23;cDkB;!ie,lB;in,yn;esJisB;!co,zek;etch3oB;yd;d4lBonn;ip;deriDliCng,rnB;an01;pe,x;co;bi0di;arZdUfrTit0lNmGnFo2rCsteb0th0uge8vBym5zra;an,ere2V;gi,iCnBrol,v2w2;est45ie;c07k;och,rique,zo;aGerFiCmB;aFe2P;lCrB;!h0;!io;s1y;nu4;be09d1iEliDmCt1viBwood;n,s;er,o;ot1Ts;!as,j43sB;ha;a2en;!dAg32mEuCwB;a25in;arB;do;o0Su0S;l,nB;est;aYeOiLoErDuCwByl0;ay8ight;a8dl6nc0st2;ag0ew;minFnDri0ugCyB;le;!l03;!a29nBov0;e7ie,y;go,icB;!k;armuCeBll1on,rk;go;id;anIj0lbeHmetri9nFon,rEsDvCwBxt3;ay8ey;en,in;hawn,mo08;ek,ri0F;is,nBv3;is,y;rt;!dB;re;lKmInHrDvB;e,iB;!d;en,iDne7rByl;eBin,yl;l2Vn;n,o,us;!e,i4ny;iBon;an,en,on;e,lB;as;a06e04hWiar0lLoGrEuCyrB;il,us;rtB;!is;aBistobal;ig;dy,lEnCrB;ey,neli9y;or,rB;ad;by,e,in,l2t1;aGeDiByI;fBnt;fo0Ct1;meCt9velaB;nd;nt;rDuCyB;!t1;de;enB;ce;aFeErisCuB;ck;!tB;i0oph3;st3;d,rlBs;eBie;s,y;cBdric,s11;il;lEmer1rB;ey,lCro7y;ll;!os,t1;eb,v2;ar02eUilTlaSoPrCuByr1;ddy,rtI;aJeEiDuCyB;an,ce,on;ce,no;an,ce;nCtB;!t;dCtB;!on;an,on;dCndB;en,on;!foBl6y;rd;bCrByd;is;!by;i8ke;al,lA;nFrBshoi;at,nCtB;!r10;aBie;rd0S;!edict,iCjam2nA;ie,y;to;n6rBt;eBy;tt;ey;ar0Xb0Nd0Jgust2hm0Gid5ja0ElZmXnPputsiOrFsaEuCveBya0ziz;ry;gust9st2;us;hi;aIchHi4jun,maFnDon,tBy0;hBu06;ur;av,oB;ld;an,nd0A;el;ie;ta;aq;dGgel05tB;hoEoB;i8nB;!i02y;ne;ny;reBy;!as,s,w;ir,mBos;ar;an,beOd5eIfFi,lEonDphonHt1vB;aMin;on;so,zo;an,en;onCrB;edP;so;c,jaEksandDssaExB;!and3;er;ar,er;ndB;ro;rtH;ni;en;ad,eB;d,t;in;aColfBri0vik;!o;mBn;!a;dFeEraCuB;!bakr,lfazl;hBm;am;!l;allEel,oulaye,ulB;!lCrahm0;an;ah,o;ah;av,on", + "Person": "true¦ashton kutchSbRcMdKeIgastNhGinez,jEkDleCmBnettJoAp8r4s3t2v0;a0irgin maG;lentino rossi,n go3;heresa may,iger woods,yra banks;addam hussain,carlett johanssJlobodan milosevic,uB;ay romano,eese witherspoIo1ush limbau0;gh;d stewart,nald0;inho,o;a0ipJ;lmIris hiltD;prah winfrFra;essiaen,itt romnEubarek;bron james,e;anye west,iefer sutherland,obe bryant;aime,effers8k rowli0;ng;alle ber0itlBulk hogan;ry;ff0meril lagasse,zekiel;ie;a0enzel washingt2ick wolf;lt1nte;ar1lint0ruz;on;dinal wols1son0;! palm2;ey;arack obama,rock;er", + "Verb": "true¦awak9born,cannot,fr8g7h5k3le2m1s0wors9;e8h3;ake sure,sg;ngth6ss6;eep tabs,n0;own;as0e2;!t2;iv1onna;ight0;en", + "PhrasalVerb": "true¦0:72;1:6Q;2:7E;3:74;4:6J;5:7H;6:76;7:6P;8:6C;9:6D;A:5I;B:71;C:70;a7Hb63c5Dd5Ae58f46g3Oh38iron0j34k2Zl2Km2Bn29o27p1Pr1Es09tQuOvacuum 1wGyammerCzD;eroAip EonD;e0k0;by,up;aJeGhFiEorDrit53;d 1k2R;mp0n4Ape0r8s8;eel Bip 7L;aEiD;gh 06rd0;n Br 3D;it 5Kk8lk6rm 0Qsh 74t67v4P;rgeCsD;e 9herA;aRePhNiJoHrFuDype 0N;ckArn D;d2in,o3Gup;ade YiDot0y 28;ckle68p 7A;ne67p Ds4D;d2o6Lup;ck FdEe Dgh5Tme0p o0Dre0;aw3ba4d2in,up;e5Ky 1;by,o6V;ink Drow 5V;ba4ov7up;aDe 4Ill4O;m 1r W;ckCke Elk D;ov7u4O;aDba4d2in,o31up;ba4ft7p4Tw3;a0Gc0Fe09h05i02lYmXnWoVpSquare RtJuHwD;earFiD;ngEtch D;aw3ba4o6P; by;ck Dit 1m 1ss0;in,up;aIe0RiHoFrD;aigh1MiD;ke 5Yn2Y;p Drm1P;by,in,o6B;n2Zr 1tc3I;c2Ymp0nd Dr6Hve6y 1;ba4d2up;d2o67up;ar2Vell0ill4UlErDurC;ingCuc8;a33it 3U;be4Crt0;ap 4Eow B;ash 4Zoke0;eep EiDow 9;c3Np 1;in,oD;ff,v7;gn Eng2Zt Dz8;d2o5up;in,o5up;aFoDu4F;ot Dut0w 5X;aw3ba4f37o5R;c2FdeAk4Sve6;e Hll0nd GtD; Dtl43;d2in,o5upD;!on;aw3ba4d2in,o1Yup;o5to;al4Lout0rap4L;il6v8;at0eKiJoGuD;b 4Ele0n Dstl8;aDba4d2in53o3Gt30u3E;c1Xw3;ot EuD;g2Knd6;a1Xf2Ro5;ng 4Op6;aDel6inAnt0;c4Yd D;o2Tu0C;aQePiOlMoKrHsyc2AuD;ll Ft D;aDba4d2in,o1Ht34up;p39w3;ap38d2in,o5t32up;attleCess EiGoD;p 1;ah1Hon;iDp 53re3Mur45wer 53;nt0;ay3ZuD;gAmp 9;ck 53g0leCn 9p3W;el 47ncilA;c3Pir 2In0ss FtEy D;ba4o4R; d2c1Y;aw3ba4o12;pDw3K;e3Jt B;arrow3Terd0oD;d6te3S;aJeHiGoEuD;ddl8ll37;c17p 1uth6ve D;al3Bd2in,o5up;ss0x 1;asur8lt 9ss D;a1Aup;ke Dn 9r30s1Lx0;do,o3Yup;aPeNiIoDuck0;a17c37g GoDse0;k Dse35;aft7ba4d2forw2Bin3Wov7uD;nd7p;in,o0J;e GghtFnEsDv1T;ten 4D;e 1k 1; 1e2Y;ar43d2;av1Ht 2YvelD; o3L;p 1sh DtchCugh6y1U;in3Lo5;eEick6nock D;d2o3H;eDyA;l2Hp D;aw3ba4d2fSin,o05to,up;aFoEuD;ic8mpA;ke2St2W;c31zz 1;aPeKiHoEuD;nker2Ts0U;lDneArse2O;d De 1;ba4d2fast,oZup;de Et D;ba4on,up;aw3o5;aDlp0;d Fl22r Dt 1;fDof;rom;in,oRu1A;cZm 1nDve it,ze1Y;d Dg 27kerF;d2in,o5;aReLive Jloss1VoFrEunD; f0M;in39ow 23; Dof 0U;aEb17it,oDr35t0Ou12;ff,n,v7;bo5ft7hJw3;aw3ba4d2in,oDup,w3;ff,n,ut;a17ek0t D;aEb11d2oDr2Zup;ff,n,ut,v7;cEhDl1Pr2Xt,w3;ead;ross;d aEnD;g 1;bo5;a08e01iRlNoJrFuD;cDel 1;k 1;eEighten DownCy 1;aw3o2L;eDshe1G; 1z8;lFol D;aDwi19;bo5r2I;d 9;aEeDip0;sh0;g 9ke0mDrD;e 2K;gLlJnHrFsEzzD;le0;h 2H;e Dm 1;aw3ba4up;d0isD;h 1;e Dl 11;aw3fI;ht ba4ure0;eInEsD;s 1;cFd D;fDo1X;or;e B;dQl 1;cHll Drm0t0O;apYbFd2in,oEtD;hrough;ff,ut,v7;a4ehi1S;e E;at0dge0nd Dy8;o1Mup;o09rD;ess 9op D;aw3bNin,o15;aShPlean 9oDross But 0T;me FoEuntD; o1M;k 1l6;aJbIforGin,oFtEuD;nd7;ogeth7;ut,v7;th,wD;ard;a4y;pDr19w3;art;eDipA;ck BeD;r 1;lJncel0rGsFtch EveA; in;o16up;h Bt6;ry EvD;e V;aw3o12;l Dm02;aDba4d2o10up;r0Vw3;a0He08l01oSrHuD;bbleFcklTilZlEndlTrn 05tDy 10zz6;t B;k 9; ov7;anMeaKiDush6;ghHng D;aEba4d2forDin,o5up;th;bo5lDr0Lw3;ong;teD;n 1;k D;d2in,o5up;ch0;arKgJil 9n8oGssFttlEunce Dx B;aw3ba4;e 9; ar0B;k Bt 1;e 1;d2up; d2;d 1;aIeed0oDurt0;cFw D;aw3ba4d2o5up;ck;k D;in,oK;ck0nk0st6; oJaGef 1nd D;d2ov7up;er;up;r0t D;d2in,oDup;ff,ut;ff,nD;to;ck Jil0nFrgEsD;h B;ainCe B;g BkC; on;in,o5; o5;aw3d2o5up;ay;cMdIsk Fuction6; oD;ff;arDo5;ouD;nd;d D;d2oDup;ff,n;own;t D;o5up;ut", + "Modal": "true¦c5lets,m4ought3sh1w0;ill,o5;a0o4;ll,nt;! to;ay,ight,ust;an,o0;uld", + "Adjective": "true¦0:73;1:7I;2:7O;3:7H;4:7A;5:5B;6:4R;7:49;8:48;9:7F;A:60;a6Eb60c5Md52e4Pf45g3Xh3Mi31j2Zk2Yl2Nm2Cn23o1Np16quack,r0Ws0Ct05uMvJwByear5;arp0eFholeEiDoB;man5oBu67;d69zy;despr6Zs5B;!sa7;eClBste22;co1El o4H;!k5;aCiBola47;b7Nce versa,ol50;ca2gabo5Ynilla;ltSnFpCrb55su4tterB;!mo6U; f30b1KpCsBti1D;ca7et,ide dItairs;er,i3J;aLbeco6Lconvin23deIeHfair,ivers4knGprecedUrEsCwB;iel1Writt5U;i1RuB;pervis0specti3;eBu5;cognHgul6Bl6B;own;ndi3v5Oxpect0;cid0rB;!grou5JsB;iz0tood;b7ppeaHssu6AuthorB;iz0;i20ra;aFeDhough4KoCrB;i1oubl0;geth6p,rp6B;en5LlBm4Vrr2Q;li3;boo,lBn;ent0;aTcSeQhPiNmug,nobbi3AoLpKqueami3AtFuBymb5Y;bDi gener50pBrprisi3;erBre0H;! dup6b,i25;du0seq4P;anda6OeEi0LrBy34;aightBip0; fBfB;or56;adfa5Wreotyp0;a4Uec2Cir1Flend5Wot on; call0le,mb6phist1TrBu0Tvi3X;d5Ury;gnifica2nB;ce4Qg7;am2Le6ocki3ut;cBda1em5lfi2Uni1Spa63re8;o1Cr3R;at53ient24reec53;cr0me,ns serif;aIeEiCoB;bu5Ktt4PuOy4;ghtBv4;!-25fA;ar,bel,condi1du5Xfres4XlDpublic3RsBtard0;is43oB;lu1na2;e1Auc41;b5EciB;al,st;aMeKicayu8lac5Copuli5BrCuB;bl54mp0;eFiCoB;!b06fu5Cmi2Xp6;mCor,sBva1;ti8;a4Re;ci58mB;a0EiB;er,um;ac1WrBti1;fe9ma2Pplexi3v2Z;rBst;allelDtB;-tiBi4;me;!ed;bMffKkJld fashion0nIpHrg1Dth6utGvB;al,erB;!aDniCt,wB;eiBrouB;ght;ll;do0Rer,g2Hsi41;en,posi1; boa5Ag2Fli8;!ay; gua58bBli8;eat;eDsB;cBer0Dole1;e8u3F;d2Ose;ak0eIiHoBua4J;nFrCtB;ab7;thB;!eB;rn;chala2descri4Ustop;ght5;arby,cessa3Sighbor5xt;aJeHiEoBultip7;bi7derClBnth5ot,st;dy;a1n;nBx0;iaBor;tu2Y;di49naBre;ci3;cBgenta,in,jZkeshift,le,mmoth,ny,sculi8;ab2Uho;aKeFiCoBu0Z;uti0Yvi3;mCteraB;l,te;it0;ftEgBth4;al,eCitiB;ma1;nda38;!-08;ngu3Lst,tt6;ap1Oind5no06;agg0uB;niKstifi0veni7;de4gno46lleg4mOnDpso 1RrB;a1releB;va2; JaIbr0corHdFfluenPiPnEsDtB;a9en3GoxB;ic31;a8i2N;a1er,oce2;iCoB;or;re9;deq3Eppr2T;fBsitu,vitro;ro2;mFpB;arDerfe9oBrop6;li1rtB;a2ed;ti4;eBi0M;d2Ln30;aGelFiDoBumdr36;ne2Uok0rrBs03ur5;if2N;ghfalut1KspB;an2L;liVpfA;lEnDrB;d01roB;wi3;dy,gi3;f,low0;ainfAener2Eiga1YlHoGraDuB;ilBng ho;ty;cCtB;efAis;efA;ne,od;ea28ob4;aQeKinJlIoDrB;a1PeBoz1G;e28q0YtfA;oDrB; keeps,eBm6tuna1;g00ign;liB;sh;ag2Uue2;al,i1;dFmCrB;ti7;a7ini8;ne;le; up;bl0i2l20r Cux,voB;ri1uri1;oBreac1A;ff;aJfficie2lImiHnFre9there4veExB;a9cess,pe1JtraCuB;be2Gl0D;!va19;n,ryday; Bcouragi3ti0M;rou1sui1;ne2;abo1YdMe14i1;g6sB;t,ygB;oi3;er;aReJiDoBrea11ue;mina2ne,ubB;le,tfA;dact16fficu1JsCvB;er1F;creDeas0gruntl0hone1AordCtB;a2ress0;er5;et; HadpGfFgene1KliDrang0spe1KtCvoB;ut;ail0ermin0;be1Hca1ghB;tfA;ia2;an;facto;i5magBngeroVs0E;ed,i3;ly;ertaNhief,ivil,oDrB;aBowd0u0D;mp0vYz0;loJmHnCoi3rrBve0K;e9u1D;cre1grEsDtB;emBra0B;po09;ta2;ue2;mer04pleB;te,x;ni4ss4;in;aLeHizarGlFoCrB;and new,isk,okL;gCna fiSttom,urgeoB;is;us;ank,iE;re;autifAhiClov0nBst,yoC;eRt;nd;ul;ckCnkru0SrrB;en;!wards; priori,b0Ic0Fd05fra04g00hZlUma01ntiquTppQrKsIttracti02utheHvEwB;aCkB;wa0P;ke,re;ant garCerB;age;de;ntQ;leep,tonisB;hi3;ab,bitEroDtiB;fiB;ci4;ga2;raB;ry;are2etiLrB;oprB;ia1;at0;arEcohCeBiIl,oof;rt;olB;ic;mi3;ead;ainDgressiConiB;zi3;ve;st;id; IeGuFvB;aCerB;se;nc0;ed;lt;pt,qB;ua1;hoc,infinitB;um;cuCtu4u1;al;ra1;erLlKoIruHsCuB;nda2;e2oCtra9;ct;lu1rbi3;ng;te;pt;aBve;rd;aze,e;ra2;nt", + "Comparable": "true¦0:3Z;1:4G;2:43;3:49;4:3V;5:2W;a4Mb42c3Md3Be33f2Pg2Dh22i1Tj1Sk1Pl1Hm1Bn15o13p0Tqu0Rr0IsRtKuIvFw7y6za11;ell25ou3;aBe9hi1Wi7r6;o3y;ck0Mde,l6n1ry,se;d,y;a6i4Kt;k,ry;n1Rr6sI;m,y;a7e6ulgar;nge4rda2xi3;gue,in,st;g0n6pco3Kse4;like0ti1;aAen9hi8i7ough,r6;anqu2Oen1ue;dy,g3Sme0ny,r09;ck,n,rs2P;d40se;ll,me,rt,s6wd45;te4;aVcarUeThRiQkin0FlMmKoHpGqua1FtAu7w6;eet,ift;b7dd13per0Gr6;e,re2H;sta2Ft5;aAe9iff,r7u6;pXr1;a6ict,o3;ig3Fn0U;a1ep,rn;le,rk;e22i3Fright0;ci28ft,l7o6re,ur;n,thi3;emn,id;a6el0ooth;ll,rt;e8i6ow,y;ck,g35m6;!y;ek,nd3D;ck,l0mp5;a6iTort,rill,y;dy,ll0Xrp;cu0Rve0Rxy;ce,ed,y;d,fe,int0l1Vv14;aBe9i8o6ude;mantic,o1Isy,u6;gh,nd;ch,pe,tzy;a6d,mo0H;dy,l;gg7ndom,p6re,w;id;ed;ai2i6;ck,et;aEhoDi1QlCoBr8u6;ny,r6;e,p5;egna2ic7o6;fouYud;ey,k0;li04or,te1B;ain,easa2;ny;in4le;dd,f6i0ld,ranQ;fi10;aAe8i7o6;b5isy,rm15sy;ce,mb5;a6w;r,t;ive,rr01;aAe8ild,o7u6;nda19te;ist,o1;a6ek,llX;n,s0ty;d,tuQ;aBeAi9o6ucky;f0Un7o1Du6ve0w17y0T;d,sy;e0g;g1Tke0tt5ve0;an,wd;me,r6te;ge;e7i6;nd;en;ol0ui1P;cy,ll,n6;sBt6;e6ima8;llege2r6;es7media6;te;ti3;ecu6ta2;re;aEeBiAo8u6;ge,m6ng1R;b5id;ll6me0t;ow;gh,l0;a6f04sita2;dy,v6;en0y;nd1Hppy,r6te4;d,sh;aGenFhDiClBoofy,r6;a9e8is0o6ue1E;o6ss;vy;at,en,y;nd,y;ad,ib,ooI;a2d1;a6o6;st0;t5uiY;u1y;aIeeb5iDlat,oAr8u6;ll,n6r14;!ny;aHe6iend0;e,sh;a7r6ul;get4mG;my;erce8n6rm,t;an6e;ciC;! ;le;ir,ke,n0Fr,st,t,ulA;aAerie,mp9sse7v6xtre0Q;il;nti6;al;ty;r7s6;tern,y;ly,th0;aFeCi9r7u6;ll,mb;u6y;nk;r7vi6;ne;e,ty;a6ep,nD;d6f,r;!ly;mp,pp03rk;aHhDlAo8r7u6;dd0r0te;isp,uel;ar6ld,mmon,ol,st0ward0zy;se;e6ou1;a6vW;n,r;ar8e6il0;ap,e6;sy;mi3;gey,lm8r6;e4i3;ful;!i3;aNiLlIoEr8u6;r0sy;ly;aAi7o6;ad,wn;ef,g7llia2;nt;ht;sh,ve;ld,r7un6;cy;ed,i3;ng;a7o6ue;nd,o1;ck,nd;g,tt6;er;d,ld,w1;dy;bsu9ng8we6;so6;me;ry;rd", + "TextValue": "true¦bOeJfDhundredNmOninAone,qu8s6t0zeroN;enMh3rNw0;e0o;l0ntD;fHve;ir0ousandKree;d,t6;e0ix8;cond,pt1ven7xt1;adr0int0;illionD;e0th;!t0;e9ie8y;i3o0;rt1ur0;!t2;ie4y;ft0rst,ve;e3h,ie2y;ight0lev2;!e1h,ie0y;th;en0;!th;illion0;!s,th", + "Ordinal": "true¦bGeDf9hundredHmGnin7qu6s4t0zeroH;enGh1rFwe0;lfFn9;ir0ousandE;d,t4;e0ixt9;cond,ptAvent8xtA;adr9int9;et0th;e6ie8;i2o0;r0urt3;tie5;ft1rst;ight0lev1;e0h,ie2;en1;illion0;th", + "Cardinal": "true¦bHeEf8hundred,mHnineAone,qu6s4t0zero;en,h2rGw0;e0o;lve,n8;irt9ousandEree;e0ix5;pt1ven4xt1;adr0int0;illion;i3o0;r1ur0;!t2;ty;ft0ve;e2y;ight0lev1;!e0y;en;illion0;!s", + "Expression": "true¦a02b01dXeVfuck,gShLlImHnGoDpBshAtsk,u7voi04w3y0;a1eLu0;ck,p;!a,hoo,y;h1ow,t0;af,f;e0oa;e,w;gh,h0;! 0h,m;huh,oh;eesh,hh,it;ff,hew,l0sst;ease,z;h1o0w,y;h,o,ps;!h;ah,ope;eh,mm;m1ol0;!s;ao,fao;a4e2i,mm,oly1urr0;ah;! mo6;e,ll0y;!o;ha0i;!ha;ah,ee,o0rr;l0odbye;ly;e0h,t cetera,ww;k,p;'oh,a0uh;m0ng;mit,n0;!it;ah,oo,ye; 1h0rgh;!em;la", + "Adverb": "true¦a07by 05d01eYfShQinPjustOkinda,mMnJoEpCquite,r9s5t2up1very,w0Bye0;p,s; to,wards5;h1o0wiO;o,t6ward;en,us;everal,o0uch;!me1rt0; of;hXtimes,w07;a1e0;alS;ndomRthN;ar excellDer0oint blank; Mhaps;f3n0;ce0ly;! 0;ag00moU; courHten;ewJo0; longEt 0;onHwithstanding;aybe,eanwhiAore0;!ovB;! aboS;deed,steT;en0;ce;or2u0;l9rther0;!moH; 0ev3;examp0good,suF;le;n mas1v0;er;se;e0irect1; 1finite0;ly;ju7trop;far,n0;ow; CbroBd nauseam,gAl5ny2part,side,t 0w3;be5l0mo5wor5;arge,ea4;mo1w0;ay;re;l 1mo0one,ready,so,ways;st;b1t0;hat;ut;ain;ad;lot,posteriori", + "Preposition": "true¦'o,-,aKbHcGdFexcept,fEinDmidPnotwithstandiQoBpRqua,sAt6u3vi2w0;/o,hereMith0;!in,oQ;a,s-a-vis;n1p0;!on;like,til;h0ill,owards;an,r0;ough0u;!oI;ans,ince,o that;',f0n1ut;!f;!to;or,rom;espite,own,u3;hez,irca;ar1e0oAy;low,sides,tween;ri6;',bo7cross,ft6lo5m3propos,round,s1t0;!op;! long 0;as;id0ong0;!st;ng;er;ut", + "Determiner": "true¦aAboth,d8e5few,l3mu7neiCown,plenty,some,th2various,wh0;at0ich0;evB;at,e3is,ose;a,e0;!ast,s;a1i6l0nough,very;!se;ch;e0u;!s;!n0;!o0y;th0;er" + }; + + var entity = ['Person', 'Place', 'Organization']; + var nouns = { + Noun: { + notA: ['Verb', 'Adjective', 'Adverb'] + }, + // - singular + Singular: { + isA: 'Noun', + notA: 'Plural' + }, + //a specific thing that's capitalized + ProperNoun: { + isA: 'Noun' + }, + // -- people + Person: { + isA: ['ProperNoun', 'Singular'], + notA: ['Place', 'Organization', 'Date'] + }, + FirstName: { + isA: 'Person' + }, + MaleName: { + isA: 'FirstName', + notA: ['FemaleName', 'LastName'] + }, + FemaleName: { + isA: 'FirstName', + notA: ['MaleName', 'LastName'] + }, + LastName: { + isA: 'Person', + notA: ['FirstName'] + }, + NickName: { + isA: 'Person', + notA: ['FirstName', 'LastName'] + }, + Honorific: { + isA: 'Noun', + notA: ['FirstName', 'LastName', 'Value'] + }, + // -- places + Place: { + isA: 'Singular', + notA: ['Person', 'Organization'] + }, + Country: { + isA: ['Place', 'ProperNoun'], + notA: ['City'] + }, + City: { + isA: ['Place', 'ProperNoun'], + notA: ['Country'] + }, + Region: { + isA: ['Place', 'ProperNoun'] + }, + Address: { + isA: 'Place' + }, + //---Orgs--- + Organization: { + isA: ['Singular', 'ProperNoun'], + notA: ['Person', 'Place'] + }, + SportsTeam: { + isA: 'Organization' + }, + School: { + isA: 'Organization' + }, + Company: { + isA: 'Organization' + }, + // - plural + Plural: { + isA: 'Noun', + notA: ['Singular'] + }, + //(not plural or singular) + Uncountable: { + isA: 'Noun' + }, + Pronoun: { + isA: 'Noun', + notA: entity + }, + //a word for someone doing something -'plumber' + Actor: { + isA: 'Noun', + notA: entity + }, + //a gerund-as-noun - 'swimming' + Activity: { + isA: 'Noun', + notA: ['Person', 'Place'] + }, + //'kilograms' + Unit: { + isA: 'Noun', + notA: entity + }, + //'Canadians' + Demonym: { + isA: ['Noun', 'ProperNoun'], + notA: entity + }, + //`john's` + Possessive: { + isA: 'Noun' // notA: 'Pronoun', + + } + }; + + var verbs = { + Verb: { + notA: ['Noun', 'Adjective', 'Adverb', 'Value'] + }, + // walks + PresentTense: { + isA: 'Verb', + notA: ['PastTense', 'Copula', 'FutureTense'] + }, + // neutral form - 'walk' + Infinitive: { + isA: 'PresentTense', + notA: ['PastTense', 'Gerund'] + }, + // walking + Gerund: { + isA: 'PresentTense', + notA: ['PastTense', 'Copula', 'FutureTense'] + }, + // walked + PastTense: { + isA: 'Verb', + notA: ['FutureTense'] + }, + // will walk + FutureTense: { + isA: 'Verb' + }, + // is + Copula: { + isA: 'Verb' + }, + // would have + Modal: { + isA: 'Verb', + notA: ['Infinitive'] + }, + // had walked + PerfectTense: { + isA: 'Verb', + notA: 'Gerund' + }, + Pluperfect: { + isA: 'Verb' + }, + // shown + Participle: { + isA: 'Verb' + }, + // show up + PhrasalVerb: { + isA: 'Verb' + }, + //'up' part + Particle: { + isA: 'PhrasalVerb' + } + }; + + var values = { + Value: { + notA: ['Verb', 'Adjective', 'Adverb'] + }, + Ordinal: { + isA: 'Value', + notA: ['Cardinal'] + }, + Cardinal: { + isA: 'Value', + notA: ['Ordinal'] + }, + RomanNumeral: { + isA: 'Cardinal', + //can be a person, too + notA: ['Ordinal', 'TextValue'] + }, + TextValue: { + isA: 'Value', + notA: ['NumericValue'] + }, + NumericValue: { + isA: 'Value', + notA: ['TextValue'] + }, + Money: { + isA: 'Cardinal' + }, + Percent: { + isA: 'Value' + } + }; + + var anything = ['Noun', 'Verb', 'Adjective', 'Adverb', 'Value', 'QuestionWord']; + var misc = { + //--Adjectives-- + Adjective: { + notA: ['Noun', 'Verb', 'Adverb', 'Value'] + }, + // adjectives that can conjugate + Comparable: { + isA: ['Adjective'] + }, + // better + Comparative: { + isA: ['Adjective'] + }, + // best + Superlative: { + isA: ['Adjective'], + notA: ['Comparative'] + }, + NumberRange: { + isA: ['Contraction'] + }, + Adverb: { + notA: ['Noun', 'Verb', 'Adjective', 'Value'] + }, + // Dates: + //not a noun, but usually is + Date: { + notA: ['Verb', 'Conjunction', 'Adverb', 'Preposition', 'Adjective'] + }, + Month: { + isA: ['Date', 'Singular'], + notA: ['Year', 'WeekDay', 'Time'] + }, + WeekDay: { + isA: ['Date', 'Noun'] + }, + // '9:20pm' + Time: { + isA: ['Date'], + notA: ['Value'] + }, + //glue + Determiner: { + notA: anything + }, + Conjunction: { + notA: anything + }, + Preposition: { + notA: anything + }, + // what, who, why + QuestionWord: { + notA: ['Determiner'] + }, + // peso, euro + Currency: {}, + // ughh + Expression: { + notA: ['Noun', 'Adjective', 'Verb', 'Adverb'] + }, + // dr. + Abbreviation: {}, + // internet tags + Url: { + notA: ['HashTag', 'PhoneNumber', 'Verb', 'Adjective', 'Value', 'AtMention', 'Email'] + }, + PhoneNumber: { + notA: ['HashTag', 'Verb', 'Adjective', 'Value', 'AtMention', 'Email'] + }, + HashTag: {}, + AtMention: { + isA: ['Noun'], + notA: ['HashTag', 'Verb', 'Adjective', 'Value', 'Email'] + }, + Emoji: { + notA: ['HashTag', 'Verb', 'Adjective', 'Value', 'AtMention'] + }, + Emoticon: { + notA: ['HashTag', 'Verb', 'Adjective', 'Value', 'AtMention'] + }, + Email: { + notA: ['HashTag', 'Verb', 'Adjective', 'Value', 'AtMention'] + }, + //non-exclusive + Auxiliary: { + notA: ['Noun', 'Adjective', 'Value'] + }, + Acronym: { + notA: ['Plural', 'RomanNumeral'] + }, + Negative: { + notA: ['Noun', 'Adjective', 'Value'] + }, + // if, unless, were + Condition: { + notA: ['Verb', 'Adjective', 'Noun', 'Value'] + } + }; + + // i just made these up + var colorMap = { + Noun: 'blue', + Verb: 'green', + Negative: 'green', + Date: 'red', + Value: 'red', + Adjective: 'magenta', + Preposition: 'cyan', + Conjunction: 'cyan', + Determiner: 'cyan', + Adverb: 'cyan' + }; + /** add a debug color to some tags */ + + var addColors = function addColors(tags) { + Object.keys(tags).forEach(function (k) { + // assigned from plugin, for example + if (tags[k].color) { + tags[k].color = tags[k].color; + return; + } // defined above + + + if (colorMap[k]) { + tags[k].color = colorMap[k]; + return; + } + + tags[k].isA.some(function (t) { + if (colorMap[t]) { + tags[k].color = colorMap[t]; + return true; + } + + return false; + }); + }); + return tags; + }; + + var _color = addColors; + + var unique$2 = function unique(arr) { + return arr.filter(function (v, i, a) { + return a.indexOf(v) === i; + }); + }; //add 'downward' tags (that immediately depend on this one) + + + var inferIsA = function inferIsA(tags) { + Object.keys(tags).forEach(function (k) { + var tag = tags[k]; + var len = tag.isA.length; + + for (var i = 0; i < len; i++) { + var down = tag.isA[i]; + + if (tags[down]) { + tag.isA = tag.isA.concat(tags[down].isA); + } + } // clean it up + + + tag.isA = unique$2(tag.isA); + }); + return tags; + }; + + var _isA = inferIsA; + + var unique$3 = function unique(arr) { + return arr.filter(function (v, i, a) { + return a.indexOf(v) === i; + }); + }; // crawl the tag-graph and infer any conflicts + // faster than doing this at tag-time + + + var inferNotA = function inferNotA(tags) { + var keys = Object.keys(tags); + keys.forEach(function (k) { + var tag = tags[k]; + tag.notA = tag.notA || []; + tag.isA.forEach(function (down) { + if (tags[down] && tags[down].notA) { + // borrow its conflicts + var notA = typeof tags[down].notA === 'string' ? [tags[down].isA] : tags[down].notA || []; + tag.notA = tag.notA.concat(notA); + } + }); // any tag that lists us as a conflict, we conflict it back. + + for (var i = 0; i < keys.length; i++) { + var key = keys[i]; + + if (tags[key].notA.indexOf(k) !== -1) { + tag.notA.push(key); + } + } // clean it up + + + tag.notA = unique$3(tag.notA); + }); + return tags; + }; + + var _notA = inferNotA; + + // a lineage is all 'incoming' tags that have this as 'isA' + var inferLineage = function inferLineage(tags) { + var keys = Object.keys(tags); + keys.forEach(function (k) { + var tag = tags[k]; + tag.lineage = []; // find all tags with it in their 'isA' set + + for (var i = 0; i < keys.length; i++) { + if (tags[keys[i]].isA.indexOf(k) !== -1) { + tag.lineage.push(keys[i]); + } + } + }); + return tags; + }; + + var _lineage = inferLineage; + + var validate = function validate(tags) { + // cleanup format + Object.keys(tags).forEach(function (k) { + var tag = tags[k]; // ensure isA is an array + + tag.isA = tag.isA || []; + + if (typeof tag.isA === 'string') { + tag.isA = [tag.isA]; + } // ensure notA is an array + + + tag.notA = tag.notA || []; + + if (typeof tag.notA === 'string') { + tag.notA = [tag.notA]; + } + }); + return tags; + }; // build-out the tag-graph structure + + + var inferTags = function inferTags(tags) { + // validate data + tags = validate(tags); // build its 'down tags' + + tags = _isA(tags); // infer the conflicts + + tags = _notA(tags); // debug tag color + + tags = _color(tags); // find incoming links + + tags = _lineage(tags); + return tags; + }; + + var inference = inferTags; + + var addIn = function addIn(obj, tags) { + Object.keys(obj).forEach(function (k) { + tags[k] = obj[k]; + }); + }; + + var build = function build() { + var tags = {}; + addIn(nouns, tags); + addIn(verbs, tags); + addIn(values, tags); + addIn(misc, tags); // do the graph-stuff + + tags = inference(tags); + return tags; + }; + + var tags = build(); + + var seq = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ", + cache = seq.split("").reduce(function (n, o, e) { + return n[o] = e, n; + }, {}), + toAlphaCode = function toAlphaCode(n) { + if (void 0 !== seq[n]) return seq[n]; + var o = 1, + e = 36, + t = ""; + + for (; n >= e; n -= e, o++, e *= 36) { + } + + for (; o--;) { + var _o = n % 36; + + t = String.fromCharCode((_o < 10 ? 48 : 55) + _o) + t, n = (n - _o) / 36; + } + + return t; + }, + fromAlphaCode = function fromAlphaCode(n) { + if (void 0 !== cache[n]) return cache[n]; + var o = 0, + e = 1, + t = 36, + r = 1; + + for (; e < n.length; o += t, e++, t *= 36) { + } + + for (var _e = n.length - 1; _e >= 0; _e--, r *= 36) { + var _t = n.charCodeAt(_e) - 48; + + _t > 10 && (_t -= 7), o += _t * r; + } + + return o; + }; + + var encoding = { + toAlphaCode: toAlphaCode, + fromAlphaCode: fromAlphaCode + }, + symbols = function symbols(n) { + var o = new RegExp("([0-9A-Z]+):([0-9A-Z]+)"); + + for (var e = 0; e < n.nodes.length; e++) { + var t = o.exec(n.nodes[e]); + + if (!t) { + n.symCount = e; + break; + } + + n.syms[encoding.fromAlphaCode(t[1])] = encoding.fromAlphaCode(t[2]); + } + + n.nodes = n.nodes.slice(n.symCount, n.nodes.length); + }; + + var indexFromRef = function indexFromRef(n, o, e) { + var t = encoding.fromAlphaCode(o); + return t < n.symCount ? n.syms[t] : e + t + 1 - n.symCount; + }, + toArray = function toArray(n) { + var o = [], + e = function e(t, r) { + var s = n.nodes[t]; + "!" === s[0] && (o.push(r), s = s.slice(1)); + var c = s.split(/([A-Z0-9,]+)/g); + + for (var _s = 0; _s < c.length; _s += 2) { + var u = c[_s], + i = c[_s + 1]; + if (!u) continue; + var l = r + u; + + if ("," === i || void 0 === i) { + o.push(l); + continue; + } + + var f = indexFromRef(n, i, t); + e(f, l); + } + }; + + return e(0, ""), o; + }, + unpack = function unpack(n) { + var o = { + nodes: n.split(";"), + syms: [], + symCount: 0 + }; + return n.match(":") && symbols(o), toArray(o); + }; + + var unpack_1 = unpack, + unpack_1$1 = function unpack_1$1(n) { + var o = n.split("|").reduce(function (n, o) { + var e = o.split("¦"); + return n[e[0]] = e[1], n; + }, {}), + e = {}; + return Object.keys(o).forEach(function (n) { + var t = unpack_1(o[n]); + "true" === n && (n = !0); + + for (var _o2 = 0; _o2 < t.length; _o2++) { + var r = t[_o2]; + !0 === e.hasOwnProperty(r) ? !1 === Array.isArray(e[r]) ? e[r] = [e[r], n] : e[r].push(n) : e[r] = n; + } + }), e; + }; + + var efrtUnpack_min = unpack_1$1; + + //safely add it to the lexicon + var addWord = function addWord(word, tag, lex) { + if (lex[word] !== undefined && + typeof lex[word] != 'function') { // @blab+ override prototype of object!!! + if (typeof lex[word] === 'string') { + lex[word] = [lex[word]]; + } + + if (typeof tag === 'string') { + lex[word].push(tag); + } else { + lex[word] = lex[word].concat(tag); + } + } else { + lex[word] = tag; + } + }; // blast-out more forms for some given words + + + var addMore = function addMore(word, tag, world) { + var lexicon = world.words; + var transform = world.transforms; // cache multi-words + + var words = word.split(' '); + + if (words.length > 1) { + //cache the beginning word + world.hasCompound[words[0]] = true; + } // inflect our nouns + + + if (tag === 'Singular') { + var plural = transform.toPlural(word, world); + lexicon[plural] = lexicon[plural] || 'Plural'; // only if it's safe + } //conjugate our verbs + + + if (tag === 'Infinitive') { + var conj = transform.conjugate(word, world); + var tags = Object.keys(conj); + + for (var i = 0; i < tags.length; i++) { + var w = conj[tags[i]]; + lexicon[w] = lexicon[w] || tags[i]; // only if it's safe + } + } //derive more adjective forms + + + if (tag === 'Comparable') { + var _conj = transform.adjectives(word); + + var _tags = Object.keys(_conj); + + for (var _i = 0; _i < _tags.length; _i++) { + var _w = _conj[_tags[_i]]; + lexicon[_w] = lexicon[_w] || _tags[_i]; // only if it's safe + } + } //conjugate phrasal-verbs + + + if (tag === 'PhrasalVerb') { + //add original form + addWord(word, 'Infinitive', lexicon); //conjugate first word + + var _conj2 = transform.conjugate(words[0], world); + + var _tags2 = Object.keys(_conj2); + + for (var _i2 = 0; _i2 < _tags2.length; _i2++) { + //add it to our cache + world.hasCompound[_conj2[_tags2[_i2]]] = true; //first + last words + + var _w2 = _conj2[_tags2[_i2]] + ' ' + words[1]; + + addWord(_w2, _tags2[_i2], lexicon); + addWord(_w2, 'PhrasalVerb', lexicon); + } + } // inflect our demonyms - 'germans' + + + if (tag === 'Demonym') { + var _plural = transform.toPlural(word, world); + + lexicon[_plural] = lexicon[_plural] || ['Demonym', 'Plural']; // only if it's safe + } + }; // throw a bunch of words in our lexicon + // const doWord = function(words, tag, world) { + // let lexicon = world.words + // for (let i = 0; i < words.length; i++) { + // addWord(words[i], tag, lexicon) + // // do some fancier stuff + // addMore(words[i], tag, world) + // } + // } + + + var addWords = { + addWord: addWord, + addMore: addMore + }; + + // add words from plurals and conjugations data + var addIrregulars = function addIrregulars(world) { + //add irregular plural nouns + var nouns = world.irregulars.nouns; + var words = Object.keys(nouns); + + for (var i = 0; i < words.length; i++) { + var w = words[i]; + world.words[w] = 'Singular'; + world.words[nouns[w]] = 'Plural'; + } // add irregular verb conjugations + + + var verbs = world.irregulars.verbs; + var keys = Object.keys(verbs); + + var _loop = function _loop(_i) { + var inf = keys[_i]; //add only if it it's safe... + + world.words[inf] = world.words[inf] || 'Infinitive'; + var forms = world.transforms.conjugate(inf, world); + forms = Object.assign(forms, verbs[inf]); //add the others + + Object.keys(forms).forEach(function (tag) { + world.words[forms[tag]] = world.words[forms[tag]] || tag; + }); + }; + + for (var _i = 0; _i < keys.length; _i++) { + _loop(_i); + } + }; + + var addIrregulars_1 = addIrregulars; + + //words that can't be compressed, for whatever reason + var misc$1 = { + // numbers + '20th century fox': 'Organization', + // '3m': 'Organization', + '7 eleven': 'Organization', + '7-eleven': 'Organization', + g8: 'Organization', + 'motel 6': 'Organization', + vh1: 'Organization', + q1: 'Date', + q2: 'Date', + q3: 'Date', + q4: 'Date' + }; + + //nouns with irregular plural/singular forms + //used in noun.inflect, and also in the lexicon. + var plurals = { + addendum: 'addenda', + alga: 'algae', + alumna: 'alumnae', + alumnus: 'alumni', + analysis: 'analyses', + antenna: 'antennae', + appendix: 'appendices', + avocado: 'avocados', + axis: 'axes', + bacillus: 'bacilli', + barracks: 'barracks', + beau: 'beaux', + bus: 'buses', + cactus: 'cacti', + chateau: 'chateaux', + child: 'children', + circus: 'circuses', + clothes: 'clothes', + corpus: 'corpora', + criterion: 'criteria', + curriculum: 'curricula', + database: 'databases', + deer: 'deer', + diagnosis: 'diagnoses', + echo: 'echoes', + embargo: 'embargoes', + epoch: 'epochs', + foot: 'feet', + formula: 'formulae', + fungus: 'fungi', + genus: 'genera', + goose: 'geese', + halo: 'halos', + hippopotamus: 'hippopotami', + index: 'indices', + larva: 'larvae', + leaf: 'leaves', + libretto: 'libretti', + loaf: 'loaves', + man: 'men', + matrix: 'matrices', + memorandum: 'memoranda', + modulus: 'moduli', + mosquito: 'mosquitoes', + mouse: 'mice', + move: 'moves', + nebula: 'nebulae', + nucleus: 'nuclei', + octopus: 'octopi', + opus: 'opera', + ovum: 'ova', + ox: 'oxen', + parenthesis: 'parentheses', + person: 'people', + phenomenon: 'phenomena', + prognosis: 'prognoses', + quiz: 'quizzes', + radius: 'radii', + referendum: 'referenda', + rodeo: 'rodeos', + sex: 'sexes', + shoe: 'shoes', + sombrero: 'sombreros', + stimulus: 'stimuli', + stomach: 'stomachs', + syllabus: 'syllabi', + synopsis: 'synopses', + tableau: 'tableaux', + thesis: 'theses', + thief: 'thieves', + tooth: 'teeth', + tornado: 'tornados', + tuxedo: 'tuxedos', + vertebra: 'vertebrae' // virus: 'viri', + // zero: 'zeros', + + }; + + // a list of irregular verb conjugations + // used in verbs().conjugate() + // but also added to our lexicon + //use shorter key-names + var mapping = { + g: 'Gerund', + prt: 'Participle', + perf: 'PerfectTense', + pst: 'PastTense', + fut: 'FuturePerfect', + pres: 'PresentTense', + pluperf: 'Pluperfect', + a: 'Actor' + }; // '_' in conjugations is the infinitive form + + var conjugations = { + act: { + a: '_or' + }, + ache: { + pst: 'ached', + g: 'aching' + }, + age: { + g: 'ageing', + pst: 'aged', + pres: 'ages' + }, + aim: { + a: '_er', + g: '_ing', + pst: '_ed' + }, + arise: { + prt: '_n', + pst: 'arose' + }, + babysit: { + a: '_ter', + pst: 'babysat' + }, + ban: { + a: '', + g: '_ning', + pst: '_ned' + }, + be: { + a: '', + g: 'am', + prt: 'been', + pst: 'was', + pres: 'is' + }, + beat: { + a: '_er', + g: '_ing', + prt: '_en' + }, + become: { + prt: '_' + }, + begin: { + g: '_ning', + prt: 'begun', + pst: 'began' + }, + being: { + g: 'are', + pst: 'were', + pres: 'are' + }, + bend: { + prt: 'bent' + }, + bet: { + a: '_ter', + prt: '_' + }, + bind: { + pst: 'bound' + }, + bite: { + g: 'biting', + prt: 'bitten', + pst: 'bit' + }, + bleed: { + prt: 'bled', + pst: 'bled' + }, + blow: { + prt: '_n', + pst: 'blew' + }, + boil: { + a: '_er' + }, + brake: { + prt: 'broken' + }, + "break": { + pst: 'broke' + }, + breed: { + pst: 'bred' + }, + bring: { + prt: 'brought', + pst: 'brought' + }, + broadcast: { + pst: '_' + }, + budget: { + pst: '_ed' + }, + build: { + prt: 'built', + pst: 'built' + }, + burn: { + prt: '_ed' + }, + burst: { + prt: '_' + }, + buy: { + prt: 'bought', + pst: 'bought' + }, + can: { + a: '', + fut: '_', + g: '', + pst: 'could', + perf: 'could', + pluperf: 'could', + pres: '_' + }, + "catch": { + pst: 'caught' + }, + choose: { + g: 'choosing', + prt: 'chosen', + pst: 'chose' + }, + cling: { + prt: 'clung' + }, + come: { + prt: '_', + pst: 'came', + g: 'coming' + }, + compete: { + a: 'competitor', + g: 'competing', + pst: '_d' + }, + cost: { + pst: '_' + }, + creep: { + prt: 'crept' + }, + cut: { + prt: '_' + }, + deal: { + prt: '_t', + pst: '_t' + }, + develop: { + a: '_er', + g: '_ing', + pst: '_ed' + }, + die: { + g: 'dying', + pst: '_d' + }, + dig: { + g: '_ging', + prt: 'dug', + pst: 'dug' + }, + dive: { + prt: '_d' + }, + "do": { + pst: 'did', + pres: '_es' + }, + draw: { + prt: '_n', + pst: 'drew' + }, + dream: { + prt: '_t' + }, + drink: { + prt: 'drunk', + pst: 'drank' + }, + drive: { + g: 'driving', + prt: '_n', + pst: 'drove' + }, + drop: { + g: '_ping', + pst: '_ped' + }, + eat: { + a: '_er', + g: '_ing', + prt: '_en', + pst: 'ate' + }, + edit: { + pst: '_ed', + g: '_ing' + }, + egg: { + pst: '_ed' + }, + fall: { + prt: '_en', + pst: 'fell' + }, + feed: { + prt: 'fed', + pst: 'fed' + }, + feel: { + a: '_er', + pst: 'felt' + }, + fight: { + prt: 'fought', + pst: 'fought' + }, + find: { + pst: 'found' + }, + flee: { + g: '_ing', + prt: 'fled' + }, + fling: { + prt: 'flung' + }, + fly: { + prt: 'flown', + pst: 'flew' + }, + forbid: { + pst: 'forbade' + }, + forget: { + g: '_ing', + prt: 'forgotten', + pst: 'forgot' + }, + forgive: { + g: 'forgiving', + prt: '_n', + pst: 'forgave' + }, + free: { + a: '', + g: '_ing' + }, + freeze: { + g: 'freezing', + prt: 'frozen', + pst: 'froze' + }, + get: { + pst: 'got', + prt: 'gotten' + }, + give: { + g: 'giving', + prt: '_n', + pst: 'gave' + }, + go: { + prt: '_ne', + pst: 'went', + pres: 'goes' + }, + grow: { + prt: '_n' + }, + hang: { + prt: 'hung', + pst: 'hung' + }, + have: { + g: 'having', + prt: 'had', + pst: 'had', + pres: 'has' + }, + hear: { + prt: '_d', + pst: '_d' + }, + hide: { + prt: 'hidden', + pst: 'hid' + }, + hit: { + prt: '_' + }, + hold: { + prt: 'held', + pst: 'held' + }, + hurt: { + prt: '_', + pst: '_' + }, + ice: { + g: 'icing', + pst: '_d' + }, + imply: { + pst: 'implied', + pres: 'implies' + }, + is: { + a: '', + g: 'being', + pst: 'was', + pres: '_' + }, + keep: { + prt: 'kept' + }, + kneel: { + prt: 'knelt' + }, + know: { + prt: '_n' + }, + lay: { + prt: 'laid', + pst: 'laid' + }, + lead: { + prt: 'led', + pst: 'led' + }, + leap: { + prt: '_t' + }, + leave: { + prt: 'left', + pst: 'left' + }, + lend: { + prt: 'lent' + }, + lie: { + g: 'lying', + pst: 'lay' + }, + light: { + prt: 'lit', + pst: 'lit' + }, + log: { + g: '_ging', + pst: '_ged' + }, + loose: { + prt: 'lost' + }, + lose: { + g: 'losing', + pst: 'lost' + }, + make: { + prt: 'made', + pst: 'made' + }, + mean: { + prt: '_t', + pst: '_t' + }, + meet: { + a: '_er', + g: '_ing', + prt: 'met', + pst: 'met' + }, + miss: { + pres: '_' + }, + name: { + g: 'naming' + }, + pay: { + prt: 'paid', + pst: 'paid' + }, + prove: { + prt: '_n' + }, + puke: { + g: 'puking' + }, + put: { + prt: '_' + }, + quit: { + prt: '_' + }, + read: { + prt: '_', + pst: '_' + }, + ride: { + prt: 'ridden' + }, + ring: { + prt: 'rung', + pst: 'rang' + }, + rise: { + fut: 'will have _n', + g: 'rising', + prt: '_n', + pst: 'rose', + pluperf: 'had _n' + }, + rub: { + g: '_bing', + pst: '_bed' + }, + run: { + g: '_ning', + prt: '_', + pst: 'ran' + }, + say: { + prt: 'said', + pst: 'said', + pres: '_s' + }, + seat: { + prt: 'sat' + }, + see: { + g: '_ing', + prt: '_n', + pst: 'saw' + }, + seek: { + prt: 'sought' + }, + sell: { + prt: 'sold', + pst: 'sold' + }, + send: { + prt: 'sent' + }, + set: { + prt: '_' + }, + sew: { + prt: '_n' + }, + shake: { + prt: '_n' + }, + shave: { + prt: '_d' + }, + shed: { + g: '_ding', + pst: '_', + pres: '_s' + }, + shine: { + prt: 'shone', + pst: 'shone' + }, + shoot: { + prt: 'shot', + pst: 'shot' + }, + show: { + pst: '_ed' + }, + shut: { + prt: '_' + }, + sing: { + prt: 'sung', + pst: 'sang' + }, + sink: { + pst: 'sank', + pluperf: 'had sunk' + }, + sit: { + pst: 'sat' + }, + ski: { + pst: '_ied' + }, + slay: { + prt: 'slain' + }, + sleep: { + prt: 'slept' + }, + slide: { + prt: 'slid', + pst: 'slid' + }, + smash: { + pres: '_es' + }, + sneak: { + prt: 'snuck' + }, + speak: { + fut: 'will have spoken', + prt: 'spoken', + pst: 'spoke', + perf: 'have spoken', + pluperf: 'had spoken' + }, + speed: { + prt: 'sped' + }, + spend: { + prt: 'spent' + }, + spill: { + prt: '_ed', + pst: 'spilt' + }, + spin: { + g: '_ning', + prt: 'spun', + pst: 'spun' + }, + spit: { + prt: 'spat' + }, + split: { + prt: '_' + }, + spread: { + pst: '_' + }, + spring: { + prt: 'sprung' + }, + stand: { + pst: 'stood' + }, + steal: { + a: '_er', + pst: 'stole' + }, + stick: { + pst: 'stuck' + }, + sting: { + pst: 'stung' + }, + stink: { + prt: 'stunk', + pst: 'stunk' + }, + stream: { + a: '_er' + }, + strew: { + prt: '_n' + }, + strike: { + g: 'striking', + pst: 'struck' + }, + suit: { + a: '_er', + g: '_ing', + pst: '_ed' + }, + sware: { + prt: 'sworn' + }, + swear: { + pst: 'swore' + }, + sweep: { + prt: 'swept' + }, + swim: { + g: '_ming', + pst: 'swam' + }, + swing: { + pst: 'swung' + }, + take: { + fut: 'will have _n', + pst: 'took', + perf: 'have _n', + pluperf: 'had _n' + }, + teach: { + pst: 'taught', + pres: '_es' + }, + tear: { + pst: 'tore' + }, + tell: { + pst: 'told' + }, + think: { + pst: 'thought' + }, + thrive: { + prt: '_d' + }, + tie: { + g: 'tying', + pst: '_d' + }, + undergo: { + prt: '_ne' + }, + understand: { + pst: 'understood' + }, + upset: { + prt: '_' + }, + wait: { + a: '_er', + g: '_ing', + pst: '_ed' + }, + wake: { + pst: 'woke' + }, + wear: { + pst: 'wore' + }, + weave: { + prt: 'woven' + }, + wed: { + pst: 'wed' + }, + weep: { + prt: 'wept' + }, + win: { + g: '_ning', + pst: 'won' + }, + wind: { + prt: 'wound' + }, + withdraw: { + pst: 'withdrew' + }, + wring: { + prt: 'wrung' + }, + write: { + g: 'writing', + prt: 'written', + pst: 'wrote' + } + }; //uncompress our ad-hoc compression scheme + + var keys = Object.keys(conjugations); + + var _loop = function _loop(i) { + var inf = keys[i]; + var _final = {}; + Object.keys(conjugations[inf]).forEach(function (key) { + var str = conjugations[inf][key]; //swap-in infinitives for '_' + + str = str.replace('_', inf); + var full = mapping[key]; + _final[full] = str; + }); //over-write original + + conjugations[inf] = _final; + }; + + for (var i = 0; i < keys.length; i++) { + _loop(i); + } + + var conjugations_1 = conjugations; + + var endsWith = { + b: [{ + reg: /([^aeiou][aeiou])b$/i, + repl: { + pr: '$1bs', + pa: '$1bbed', + gr: '$1bbing' + } + }], + d: [{ + reg: /(end)$/i, + repl: { + pr: '$1s', + pa: 'ent', + gr: '$1ing', + ar: '$1er' + } + }, { + reg: /(eed)$/i, + repl: { + pr: '$1s', + pa: '$1ed', + gr: '$1ing', + ar: '$1er' + } + }, { + reg: /(ed)$/i, + repl: { + pr: '$1s', + pa: '$1ded', + ar: '$1der', + gr: '$1ding' + } + }, { + reg: /([^aeiou][ou])d$/i, + repl: { + pr: '$1ds', + pa: '$1dded', + gr: '$1dding' + } + }], + e: [{ + reg: /(eave)$/i, + repl: { + pr: '$1s', + pa: '$1d', + gr: 'eaving', + ar: '$1r' + } + }, { + reg: /(ide)$/i, + repl: { + pr: '$1s', + pa: 'ode', + gr: 'iding', + ar: 'ider' + } + }, { + //shake + reg: /(t|sh?)(ake)$/i, + repl: { + pr: '$1$2s', + pa: '$1ook', + gr: '$1aking', + ar: '$1$2r' + } + }, { + //awake + reg: /w(ake)$/i, + repl: { + pr: 'w$1s', + pa: 'woke', + gr: 'waking', + ar: 'w$1r' + } + }, { + //make + reg: /m(ake)$/i, + repl: { + pr: 'm$1s', + pa: 'made', + gr: 'making', + ar: 'm$1r' + } + }, { + reg: /(a[tg]|i[zn]|ur|nc|gl|is)e$/i, + repl: { + pr: '$1es', + pa: '$1ed', + gr: '$1ing' // prt: '$1en', + + } + }, { + reg: /([bd]l)e$/i, + repl: { + pr: '$1es', + pa: '$1ed', + gr: '$1ing' + } + }, { + reg: /(om)e$/i, + repl: { + pr: '$1es', + pa: 'ame', + gr: '$1ing' + } + }], + g: [{ + reg: /([^aeiou][ou])g$/i, + repl: { + pr: '$1gs', + pa: '$1gged', + gr: '$1gging' + } + }], + h: [{ + reg: /(..)([cs]h)$/i, + repl: { + pr: '$1$2es', + pa: '$1$2ed', + gr: '$1$2ing' + } + }], + k: [{ + reg: /(ink)$/i, + repl: { + pr: '$1s', + pa: 'unk', + gr: '$1ing', + ar: '$1er' + } + }], + m: [{ + reg: /([^aeiou][aeiou])m$/i, + repl: { + pr: '$1ms', + pa: '$1mmed', + gr: '$1mming' + } + }], + n: [{ + reg: /(en)$/i, + repl: { + pr: '$1s', + pa: '$1ed', + gr: '$1ing' + } + }], + p: [{ + reg: /(e)(ep)$/i, + repl: { + pr: '$1$2s', + pa: '$1pt', + gr: '$1$2ing', + ar: '$1$2er' + } + }, { + reg: /([^aeiou][aeiou])p$/i, + repl: { + pr: '$1ps', + pa: '$1pped', + gr: '$1pping' + } + }, { + reg: /([aeiu])p$/i, + repl: { + pr: '$1ps', + pa: '$1p', + gr: '$1pping' + } + }], + r: [{ + reg: /([td]er)$/i, + repl: { + pr: '$1s', + pa: '$1ed', + gr: '$1ing' + } + }, { + reg: /(er)$/i, + repl: { + pr: '$1s', + pa: '$1ed', + gr: '$1ing' + } + }], + s: [{ + reg: /(ish|tch|ess)$/i, + repl: { + pr: '$1es', + pa: '$1ed', + gr: '$1ing' + } + }], + t: [{ + reg: /(ion|end|e[nc]t)$/i, + repl: { + pr: '$1s', + pa: '$1ed', + gr: '$1ing' + } + }, { + reg: /(.eat)$/i, + repl: { + pr: '$1s', + pa: '$1ed', + gr: '$1ing' + } + }, { + reg: /([aeiu])t$/i, + repl: { + pr: '$1ts', + pa: '$1t', + gr: '$1tting' + } + }, { + reg: /([^aeiou][aeiou])t$/i, + repl: { + pr: '$1ts', + pa: '$1tted', + gr: '$1tting' + } + }], + w: [{ + reg: /(..)(ow)$/i, + repl: { + pr: '$1$2s', + pa: '$1ew', + gr: '$1$2ing', + prt: '$1$2n' + } + }], + y: [{ + reg: /([i|f|rr])y$/i, + repl: { + pr: '$1ies', + pa: '$1ied', + gr: '$1ying' + } + }], + z: [{ + reg: /([aeiou]zz)$/i, + repl: { + pr: '$1es', + pa: '$1ed', + gr: '$1ing' + } + }] + }; + var suffixes = endsWith; + + var posMap = { + pr: 'PresentTense', + pa: 'PastTense', + gr: 'Gerund', + prt: 'Participle', + ar: 'Actor' + }; + + var doTransform = function doTransform(str, obj) { + var found = {}; + var keys = Object.keys(obj.repl); + + for (var i = 0; i < keys.length; i += 1) { + var pos = keys[i]; + found[posMap[pos]] = str.replace(obj.reg, obj.repl[pos]); + } + + return found; + }; //look at the end of the word for clues + + + var checkSuffix = function checkSuffix() { + var str = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ''; + var c = str[str.length - 1]; + + if (suffixes.hasOwnProperty(c) === true) { + for (var r = 0; r < suffixes[c].length; r += 1) { + var reg = suffixes[c][r].reg; + + if (reg.test(str) === true) { + return doTransform(str, suffixes[c][r]); + } + } + } + + return {}; + }; + + var _01Suffixes = checkSuffix; + + //non-specifc, 'hail-mary' transforms from infinitive, into other forms + var hasY = /[bcdfghjklmnpqrstvwxz]y$/; + var generic = { + Gerund: function Gerund(inf) { + if (inf.charAt(inf.length - 1) === 'e') { + return inf.replace(/e$/, 'ing'); + } + + return inf + 'ing'; + }, + PresentTense: function PresentTense(inf) { + if (inf.charAt(inf.length - 1) === 's') { + return inf + 'es'; + } + + if (hasY.test(inf) === true) { + return inf.slice(0, -1) + 'ies'; + } + + return inf + 's'; + }, + PastTense: function PastTense(inf) { + if (inf.charAt(inf.length - 1) === 'e') { + return inf + 'd'; + } + + if (inf.substr(-2) === 'ed') { + return inf; + } + + if (hasY.test(inf) === true) { + return inf.slice(0, -1) + 'ied'; + } + + return inf + 'ed'; + } + }; + var _02Generic = generic; + + //we assume the input word is a proper infinitive + + var conjugate = function conjugate() { + var inf = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ''; + var world = arguments.length > 1 ? arguments[1] : undefined; + var found = {}; // 1. look at irregulars + //the lexicon doesn't pass this in + + if (world && world.irregulars) { + if (world.irregulars.verbs.hasOwnProperty(inf) === true) { + found = Object.assign({}, world.irregulars.verbs[inf]); + } + } //2. rule-based regex + + + found = Object.assign({}, _01Suffixes(inf), found); //3. generic transformations + //'buzzing' + + if (found.Gerund === undefined) { + found.Gerund = _02Generic.Gerund(inf); + } //'buzzed' + + + if (found.PastTense === undefined) { + found.PastTense = _02Generic.PastTense(inf); + } //'buzzes' + + + if (found.PresentTense === undefined) { + found.PresentTense = _02Generic.PresentTense(inf); + } + + return found; + }; + + var conjugate_1 = conjugate; // console.log(conjugate('bake')) + + //turn 'quick' into 'quickest' + var do_rules = [/ght$/, /nge$/, /ough$/, /ain$/, /uel$/, /[au]ll$/, /ow$/, /oud$/, /...p$/]; + var dont_rules = [/ary$/]; + var irregulars = { + nice: 'nicest', + late: 'latest', + hard: 'hardest', + inner: 'innermost', + outer: 'outermost', + far: 'furthest', + worse: 'worst', + bad: 'worst', + good: 'best', + big: 'biggest', + large: 'largest' + }; + var transforms = [{ + reg: /y$/i, + repl: 'iest' + }, { + reg: /([aeiou])t$/i, + repl: '$1ttest' + }, { + reg: /([aeou])de$/i, + repl: '$1dest' + }, { + reg: /nge$/i, + repl: 'ngest' + }, { + reg: /([aeiou])te$/i, + repl: '$1test' + }]; + + var to_superlative = function to_superlative(str) { + //irregulars + if (irregulars.hasOwnProperty(str)) { + return irregulars[str]; + } //known transforms + + + for (var i = 0; i < transforms.length; i++) { + if (transforms[i].reg.test(str)) { + return str.replace(transforms[i].reg, transforms[i].repl); + } + } //dont-rules + + + for (var _i = 0; _i < dont_rules.length; _i++) { + if (dont_rules[_i].test(str) === true) { + return null; + } + } //do-rules + + + for (var _i2 = 0; _i2 < do_rules.length; _i2++) { + if (do_rules[_i2].test(str) === true) { + if (str.charAt(str.length - 1) === 'e') { + return str + 'st'; + } + + return str + 'est'; + } + } + + return str + 'est'; + }; + + var toSuperlative = to_superlative; + + //turn 'quick' into 'quickly' + var do_rules$1 = [/ght$/, /nge$/, /ough$/, /ain$/, /uel$/, /[au]ll$/, /ow$/, /old$/, /oud$/, /e[ae]p$/]; + var dont_rules$1 = [/ary$/, /ous$/]; + var irregulars$1 = { + grey: 'greyer', + gray: 'grayer', + green: 'greener', + yellow: 'yellower', + red: 'redder', + good: 'better', + well: 'better', + bad: 'worse', + sad: 'sadder', + big: 'bigger' + }; + var transforms$1 = [{ + reg: /y$/i, + repl: 'ier' + }, { + reg: /([aeiou])t$/i, + repl: '$1tter' + }, { + reg: /([aeou])de$/i, + repl: '$1der' + }, { + reg: /nge$/i, + repl: 'nger' + }]; + + var to_comparative = function to_comparative(str) { + //known-irregulars + if (irregulars$1.hasOwnProperty(str)) { + return irregulars$1[str]; + } //known-transforms + + + for (var i = 0; i < transforms$1.length; i++) { + if (transforms$1[i].reg.test(str) === true) { + return str.replace(transforms$1[i].reg, transforms$1[i].repl); + } + } //dont-patterns + + + for (var _i = 0; _i < dont_rules$1.length; _i++) { + if (dont_rules$1[_i].test(str) === true) { + return null; + } + } //do-patterns + + + for (var _i2 = 0; _i2 < do_rules$1.length; _i2++) { + if (do_rules$1[_i2].test(str) === true) { + return str + 'er'; + } + } //easy-one + + + if (/e$/.test(str) === true) { + return str + 'r'; + } + + return str + 'er'; + }; + + var toComparative = to_comparative; + + var fns$1 = { + toSuperlative: toSuperlative, + toComparative: toComparative + }; + /** conjugate an adjective into other forms */ + + var conjugate$1 = function conjugate(w) { + var res = {}; // 'greatest' + + var sup = fns$1.toSuperlative(w); + + if (sup) { + res.Superlative = sup; + } // 'greater' + + + var comp = fns$1.toComparative(w); + + if (comp) { + res.Comparative = comp; + } + + return res; + }; + + var adjectives = conjugate$1; + + /** patterns for turning 'bus' to 'buses'*/ + var suffixes$1 = { + a: [[/(antenn|formul|nebul|vertebr|vit)a$/i, '$1ae'], [/([ti])a$/i, '$1a']], + e: [[/(kn|l|w)ife$/i, '$1ives'], [/(hive)$/i, '$1s'], [/([m|l])ouse$/i, '$1ice'], [/([m|l])ice$/i, '$1ice']], + f: [[/^(dwar|handkerchie|hoo|scar|whar)f$/i, '$1ves'], [/^((?:ca|e|ha|(?:our|them|your)?se|she|wo)l|lea|loa|shea|thie)f$/i, '$1ves']], + i: [[/(octop|vir)i$/i, '$1i']], + m: [[/([ti])um$/i, '$1a']], + n: [[/^(oxen)$/i, '$1']], + o: [[/(al|ad|at|er|et|ed|ad)o$/i, '$1oes']], + s: [[/(ax|test)is$/i, '$1es'], [/(alias|status)$/i, '$1es'], [/sis$/i, 'ses'], [/(bu)s$/i, '$1ses'], [/(sis)$/i, 'ses'], [/^(?!talis|.*hu)(.*)man$/i, '$1men'], [/(octop|vir|radi|nucle|fung|cact|stimul)us$/i, '$1i']], + x: [[/(matr|vert|ind|cort)(ix|ex)$/i, '$1ices'], [/^(ox)$/i, '$1en']], + y: [[/([^aeiouy]|qu)y$/i, '$1ies']], + z: [[/(quiz)$/i, '$1zes']] + }; + var _rules = suffixes$1; + + var addE = /(x|ch|sh|s|z)$/; + + var trySuffix = function trySuffix(str) { + var c = str[str.length - 1]; + + if (_rules.hasOwnProperty(c) === true) { + for (var i = 0; i < _rules[c].length; i += 1) { + var reg = _rules[c][i][0]; + + if (reg.test(str) === true) { + return str.replace(reg, _rules[c][i][1]); + } + } + } + + return null; + }; + /** Turn a singular noun into a plural + * assume the given string is singular + */ + + + var pluralize = function pluralize() { + var str = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ''; + var world = arguments.length > 1 ? arguments[1] : undefined; + var irregulars = world.irregulars.nouns; // check irregulars list + + if (irregulars.hasOwnProperty(str)) { + return irregulars[str]; + } //we have some rules to try-out + + + var plural = trySuffix(str); + + if (plural !== null) { + return plural; + } //like 'church' + + + if (addE.test(str)) { + return str + 'es'; + } // ¯\_(ツ)_/¯ + + + return str + 's'; + }; + + var toPlural = pluralize; + + //patterns for turning 'dwarves' to 'dwarf' + var _rules$1 = [[/([^v])ies$/i, '$1y'], [/ises$/i, 'isis'], [/(kn|[^o]l|w)ives$/i, '$1ife'], [/^((?:ca|e|ha|(?:our|them|your)?se|she|wo)l|lea|loa|shea|thie)ves$/i, '$1f'], [/^(dwar|handkerchie|hoo|scar|whar)ves$/i, '$1f'], [/(antenn|formul|nebul|vertebr|vit)ae$/i, '$1a'], [/(octop|vir|radi|nucle|fung|cact|stimul)(i)$/i, '$1us'], [/(buffal|tomat|tornad)(oes)$/i, '$1o'], // [/(analy|diagno|parenthe|progno|synop|the)ses$/i, '$1sis'], + [/(eas)es$/i, '$1e'], //diseases + [/(..[aeiou]s)es$/i, '$1'], //geniouses + [/(vert|ind|cort)(ices)$/i, '$1ex'], [/(matr|append)(ices)$/i, '$1ix'], [/(x|ch|ss|sh|z|o)es$/i, '$1'], [/men$/i, 'man'], [/(n)ews$/i, '$1ews'], [/([ti])a$/i, '$1um'], [/([^aeiouy]|qu)ies$/i, '$1y'], [/(s)eries$/i, '$1eries'], [/(m)ovies$/i, '$1ovie'], [/([m|l])ice$/i, '$1ouse'], [/(cris|ax|test)es$/i, '$1is'], [/(alias|status)es$/i, '$1'], [/(ss)$/i, '$1'], [/(ics)$/i, '$1'], [/s$/i, '']]; + + var invertObj = function invertObj(obj) { + return Object.keys(obj).reduce(function (h, k) { + h[obj[k]] = k; + return h; + }, {}); + }; + + var toSingular = function toSingular(str, world) { + var irregulars = world.irregulars.nouns; + var invert = invertObj(irregulars); //(not very efficient) + // check irregulars list + + if (invert.hasOwnProperty(str)) { + return invert[str]; + } // go through our regexes + + + for (var i = 0; i < _rules$1.length; i++) { + if (_rules$1[i][0].test(str) === true) { + str = str.replace(_rules$1[i][0], _rules$1[i][1]); + return str; + } + } + + return str; + }; + + var toSingular_1 = toSingular; + + //rules for turning a verb into infinitive form + var rules = { + Participle: [{ + reg: /own$/i, + to: 'ow' + }, { + reg: /(.)un([g|k])$/i, + to: '$1in$2' + }], + Actor: [{ + reg: /(er)er$/i, + to: '$1' + }], + PresentTense: [{ + reg: /(..)(ies)$/i, + to: '$1y' + }, { + reg: /(tch|sh)es$/i, + to: '$1' + }, { + reg: /(ss|zz)es$/i, + to: '$1' + }, { + reg: /([tzlshicgrvdnkmu])es$/i, + to: '$1e' + }, { + reg: /(n[dtk]|c[kt]|[eo]n|i[nl]|er|a[ytrl])s$/i, + to: '$1' + }, { + reg: /(ow)s$/i, + to: '$1' + }, { + reg: /(op)s$/i, + to: '$1' + }, { + reg: /([eirs])ts$/i, + to: '$1t' + }, { + reg: /(ll)s$/i, + to: '$1' + }, { + reg: /(el)s$/i, + to: '$1' + }, { + reg: /(ip)es$/i, + to: '$1e' + }, { + reg: /ss$/i, + to: 'ss' + }, { + reg: /s$/i, + to: '' + }], + Gerund: [{ + //popping -> pop + reg: /(..)(p|d|t|g){2}ing$/i, + to: '$1$2' + }, { + //fuzzing -> fuzz + reg: /(ll|ss|zz)ing$/i, + to: '$1' + }, { + reg: /([^aeiou])ying$/i, + to: '$1y' + }, { + reg: /([^ae]i.)ing$/i, + to: '$1e' + }, { + //eating, reading + reg: /(ea[dklnrtv])ing$/i, + to: '$1' + }, { + //washing -> wash + reg: /(ch|sh)ing$/i, + to: '$1' + }, //soft-e forms: + { + //z : hazing (not buzzing) + reg: /(z)ing$/i, + to: '$1e' + }, { + //a : baking, undulating + reg: /(a[gdkvtc])ing$/i, + to: '$1e' + }, { + //u : conjuring, tubing + reg: /(u[rtcbn])ing$/i, + to: '$1e' + }, { + //o : forboding, poking, hoping, boring (not hooping) + reg: /([^o]o[bdknprv])ing$/i, + to: '$1e' + }, { + //ling : tingling, wrinkling, circling, scrambling, bustling + reg: /([tbckg]l)ing$/i, + //dp + to: '$1e' + }, { + //cing : bouncing, denouncing + reg: /(c|s)ing$/i, + //dp + to: '$1e' + }, // { + // //soft-e : + // reg: /([ua]s|[dr]g|z|o[rlsp]|cre)ing$/i, + // to: '$1e', + // }, + { + //fallback + reg: /(..)ing$/i, + to: '$1' + }], + PastTense: [{ + reg: /(ued)$/i, + to: 'ue' + }, { + reg: /a([^aeiouy])ed$/i, + to: 'a$1e' + }, { + reg: /([aeiou]zz)ed$/i, + to: '$1' + }, { + reg: /(e|i)lled$/i, + to: '$1ll' + }, { + reg: /(.)(sh|ch)ed$/i, + to: '$1$2' + }, { + reg: /(tl|gl)ed$/i, + to: '$1e' + }, { + reg: /(um?pt?)ed$/i, + to: '$1' + }, { + reg: /(ss)ed$/i, + to: '$1' + }, { + reg: /pped$/i, + to: 'p' + }, { + reg: /tted$/i, + to: 't' + }, { + reg: /(..)gged$/i, + to: '$1g' + }, { + reg: /(..)lked$/i, + to: '$1lk' + }, { + reg: /([^aeiouy][aeiou])ked$/i, + to: '$1ke' + }, { + reg: /(.[aeiou])led$/i, + to: '$1l' + }, { + reg: /(..)(h|ion|n[dt]|ai.|[cs]t|pp|all|ss|tt|int|ail|ld|en|oo.|er|k|pp|w|ou.|rt|ght|rm)ed$/i, + to: '$1$2' + }, { + reg: /(.ut)ed$/i, + to: '$1e' + }, { + reg: /(.pt)ed$/i, + to: '$1' + }, { + reg: /(us)ed$/i, + to: '$1e' + }, { + reg: /(dd)ed$/i, + to: '$1' + }, { + reg: /(..[^aeiouy])ed$/i, + to: '$1e' + }, { + reg: /(..)ied$/i, + to: '$1y' + }, { + reg: /(.o)ed$/i, + to: '$1o' + }, { + reg: /(..i)ed$/i, + to: '$1' + }, { + reg: /(.a[^aeiou])ed$/i, + to: '$1' + }, { + //owed, aced + reg: /([aeiou][^aeiou])ed$/i, + to: '$1e' + }, { + reg: /([rl])ew$/i, + to: '$1ow' + }, { + reg: /([pl])t$/i, + to: '$1t' + }] + }; + var _transform = rules; + + var guessVerb = { + Gerund: ['ing'], + Actor: ['erer'], + Infinitive: ['ate', 'ize', 'tion', 'rify', 'then', 'ress', 'ify', 'age', 'nce', 'ect', 'ise', 'ine', 'ish', 'ace', 'ash', 'ure', 'tch', 'end', 'ack', 'and', 'ute', 'ade', 'ock', 'ite', 'ase', 'ose', 'use', 'ive', 'int', 'nge', 'lay', 'est', 'ain', 'ant', 'ent', 'eed', 'er', 'le', 'own', 'unk', 'ung', 'en'], + PastTense: ['ed', 'lt', 'nt', 'pt', 'ew', 'ld'], + PresentTense: ['rks', 'cks', 'nks', 'ngs', 'mps', 'tes', 'zes', 'ers', 'les', 'acks', 'ends', 'ands', 'ocks', 'lays', 'eads', 'lls', 'els', 'ils', 'ows', 'nds', 'ays', 'ams', 'ars', 'ops', 'ffs', 'als', 'urs', 'lds', 'ews', 'ips', 'es', 'ts', 'ns'] + }; //flip it into a lookup object + + guessVerb = Object.keys(guessVerb).reduce(function (h, k) { + guessVerb[k].forEach(function (a) { + return h[a] = k; + }); + return h; + }, {}); + var _guess = guessVerb; + + /** it helps to know what we're conjugating from */ + + var guessTense = function guessTense(str) { + var three = str.substr(str.length - 3); + + if (_guess.hasOwnProperty(three) === true) { + return _guess[three]; + } + + var two = str.substr(str.length - 2); + + if (_guess.hasOwnProperty(two) === true) { + return _guess[two]; + } + + var one = str.substr(str.length - 1); + + if (one === 's') { + return 'PresentTense'; + } + + return null; + }; + + var toInfinitive = function toInfinitive(str, world, tense) { + if (!str) { + return ''; + } //1. look at known irregulars + + + if (world.words.hasOwnProperty(str) === true) { + var irregs = world.irregulars.verbs; + var keys = Object.keys(irregs); + + for (var i = 0; i < keys.length; i++) { + var forms = Object.keys(irregs[keys[i]]); + + for (var o = 0; o < forms.length; o++) { + if (str === irregs[keys[i]][forms[o]]) { + return keys[i]; + } + } + } + } // give'r! + + + tense = tense || guessTense(str); + + if (tense && _transform[tense]) { + for (var _i = 0; _i < _transform[tense].length; _i++) { + var rule = _transform[tense][_i]; + + if (rule.reg.test(str) === true) { + // console.log(rule.reg) + return str.replace(rule.reg, rule.to); + } + } + } + + return str; + }; + + var toInfinitive_1 = toInfinitive; + + var irregulars$2 = { + nouns: plurals, + verbs: conjugations_1 + }; //these behaviours are configurable & shared across some plugins + + var transforms$2 = { + conjugate: conjugate_1, + adjectives: adjectives, + toPlural: toPlural, + toSingular: toSingular_1, + toInfinitive: toInfinitive_1 + }; + var _isVerbose = false; + /** all configurable linguistic data */ + + var World = /*#__PURE__*/function () { + function World() { + _classCallCheck(this, World); + + // quiet these properties from a console.log + Object.defineProperty(this, 'words', { + enumerable: false, + value: misc$1, + writable: true + }); + Object.defineProperty(this, 'hasCompound', { + enumerable: false, + value: {}, + writable: true + }); + Object.defineProperty(this, 'irregulars', { + enumerable: false, + value: irregulars$2, + writable: true + }); + Object.defineProperty(this, 'tags', { + enumerable: false, + value: Object.assign({}, tags), + writable: true + }); + Object.defineProperty(this, 'transforms', { + enumerable: false, + value: transforms$2, + writable: true + }); + Object.defineProperty(this, 'taggers', { + enumerable: false, + value: [], + writable: true + }); // add our compressed data to lexicon + + this.unpackWords(_data); // add our irregulars to lexicon + + this.addIrregulars(); // cache our abbreviations for our sentence-parser + + Object.defineProperty(this, 'cache', { + enumerable: false, + value: { + abbreviations: this.getByTag('Abbreviation') + } + }); + } + /** more logs for debugging */ + + + _createClass(World, [{ + key: "verbose", + value: function verbose(bool) { + _isVerbose = bool; + return this; + } + }, { + key: "isVerbose", + value: function isVerbose() { + return _isVerbose; + } + /** get all terms in our lexicon with this tag */ + + }, { + key: "getByTag", + value: function getByTag(tag) { + var lex = this.words; + var res = {}; + var words = Object.keys(lex); + + for (var i = 0; i < words.length; i++) { + if (typeof lex[words[i]] === 'string') { + if (lex[words[i]] === tag) { + res[words[i]] = true; + } + } else if (lex[words[i]].some(function (t) { + return t === tag; + })) { + res[words[i]] = true; + } + } + + return res; + } + /** augment our lingustic data with new data */ + + }, { + key: "unpackWords", + value: function unpackWords(lex) { + var tags = Object.keys(lex); + + for (var i = 0; i < tags.length; i++) { + var words = Object.keys(efrtUnpack_min(lex[tags[i]])); + + for (var w = 0; w < words.length; w++) { + addWords.addWord(words[w], tags[i], this.words); // do some fancier stuff + + addWords.addMore(words[w], tags[i], this); + } + } + } + /** put new words into our lexicon, properly */ + + }, { + key: "addWords", + value: function addWords$1(obj) { + var keys = Object.keys(obj); + + for (var i = 0; i < keys.length; i++) { + var word = keys[i].toLowerCase(); + addWords.addWord(word, obj[keys[i]], this.words); // do some fancier stuff + + addWords.addMore(word, obj[keys[i]], this); + } + } + }, { + key: "addIrregulars", + value: function addIrregulars() { + addIrregulars_1(this); + + return this; + } + /** extend the compromise tagset */ + + }, { + key: "addTags", + value: function addTags(tags) { + tags = Object.assign({}, tags); + this.tags = Object.assign(this.tags, tags); // calculate graph implications for the new tags + + this.tags = inference(this.tags); + return this; + } + /** call methods after tagger runs */ + + }, { + key: "postProcess", + value: function postProcess(fn) { + this.taggers.push(fn); + return this; + } + /** helper method for logging + debugging */ + + }, { + key: "stats", + value: function stats() { + return { + words: Object.keys(this.words).length, + plurals: Object.keys(this.irregulars.nouns).length, + conjugations: Object.keys(this.irregulars.verbs).length, + compounds: Object.keys(this.hasCompound).length, + postProcessors: this.taggers.length + }; + } + }]); + + return World; + }(); // ¯\_(:/)_/¯ + + + var clone$1 = function clone(obj) { + return JSON.parse(JSON.stringify(obj)); + }; + /** produce a deep-copy of all lingustic data */ + + + World.prototype.clone = function () { + var w2 = new World(); // these are simple to copy: + + w2.words = Object.assign({}, this.words); + w2.hasCompound = Object.assign({}, this.hasCompound); //these ones are nested: + + w2.irregulars = clone$1(this.irregulars); + w2.tags = clone$1(this.tags); // these are functions + + w2.transforms = this.transforms; + w2.taggers = this.taggers; + return w2; + }; + + var World_1 = World; + + var _01Utils$1 = createCommonjsModule(function (module, exports) { + /** return the root, first document */ + exports.all = function () { + return this.parents()[0] || this; + }; + /** return the previous result */ + + + exports.parent = function () { + if (this.from) { + return this.from; + } + + return this; + }; + /** return a list of all previous results */ + + + exports.parents = function (n) { + var arr = []; + + var addParent = function addParent(doc) { + if (doc.from) { + arr.push(doc.from); + addParent(doc.from); + } + }; + + addParent(this); + arr = arr.reverse(); + + if (typeof n === 'number') { + return arr[n]; + } + + return arr; + }; + /** deep-copy the document, so that no references remain */ + + + exports.clone = function (doShallow) { + var list = this.list.map(function (ts) { + return ts.clone(doShallow); + }); + var tmp = this.buildFrom(list); + return tmp; + }; + /** how many seperate terms does the document have? */ + + + exports.wordCount = function () { + return this.list.reduce(function (count, p) { + count += p.wordCount(); + return count; + }, 0); + }; + + exports.wordcount = exports.wordCount; + /** turn on logging for decision-debugging */ + // exports.verbose = function(bool) { + // if (bool === undefined) { + // bool = true + // } + // this.world.verbose = bool + // } + }); + + var _02Accessors = createCommonjsModule(function (module, exports) { + /** use only the first result(s) */ + exports.first = function (n) { + if (n === undefined) { + return this.get(0); + } + + return this.slice(0, n); + }; + /** use only the last result(s) */ + + + exports.last = function (n) { + if (n === undefined) { + return this.get(this.list.length - 1); + } + + var end = this.list.length; + return this.slice(end - n, end); + }; + /** grab a given subset of the results*/ + + + exports.slice = function (start, end) { + var list = this.list.slice(start, end); + return this.buildFrom(list); + }; + /* grab nth result */ + + + exports.eq = function (n) { + var p = this.list[n]; + + if (p === undefined) { + return this.buildFrom([]); + } + + return this.buildFrom([p]); + }; + + exports.get = exports.eq; + /** grab term[0] for every match */ + + exports.firstTerms = function () { + return this.match('^.'); + }; + + exports.firstTerm = exports.firstTerms; + /** grab the last term for every match */ + + exports.lastTerms = function () { + return this.match('.$'); + }; + + exports.lastTerm = exports.lastTerms; + /** return a flat array of term objects */ + + exports.termList = function (num) { + var arr = []; //'reduce' but faster + + for (var i = 0; i < this.list.length; i++) { + var terms = this.list[i].terms(); + + for (var o = 0; o < terms.length; o++) { + arr.push(terms[o]); //support .termList(4) + + if (num !== undefined && arr[num] !== undefined) { + return arr[num]; + } + } + } + + return arr; + }; + /** return a flat array of term text strings */ + /* @blab+ */ + exports.textList = function (num) { + var arr = []; //'reduce' but faster + + for (var i = 0; i < this.list.length; i++) { + var terms = this.list[i].terms(); + + for (var o = 0; o < terms.length; o++) { + arr.push(terms[o]); //support .termList(4) + + if (num !== undefined && arr[num] !== undefined) { + return arr[num]; + } + } + } + + return arr.map(function (term) { return term.text }); + }; + + + /* grab named capture group terms as object */ + var getGroups = function getGroups(doc) { + var res = {}; + var allGroups = {}; + + var _loop = function _loop(i) { + var phrase = doc.list[i]; + var groups = Object.keys(phrase.groups).map(function (k) { + return phrase.groups[k]; + }); + + for (var j = 0; j < groups.length; j++) { + var _groups$j = groups[j], + group = _groups$j.group, + start = _groups$j.start, + length = _groups$j.length; + + if (!allGroups[group]) { + allGroups[group] = []; + } + + allGroups[group].push(phrase.buildFrom(start, length)); + } + }; + + for (var i = 0; i < doc.list.length; i++) { + _loop(i); + } + + var keys = Object.keys(allGroups); + + for (var _i = 0; _i < keys.length; _i++) { + var key = keys[_i]; + res[key] = doc.buildFrom(allGroups[key]); + } + + return res; + }; + + var getOneName = function getOneName(doc, name) { + var arr = []; + + var _loop2 = function _loop2(i) { + var phrase = doc.list[i]; + var keys = Object.keys(phrase.groups); + keys = keys.filter(function (id) { + return phrase.groups[id].group === name; + }); + keys.forEach(function (id) { + arr.push(phrase.buildFrom(phrase.groups[id].start, phrase.groups[id].length)); + }); + }; + + for (var i = 0; i < doc.list.length; i++) { + _loop2(i); + } + + return doc.buildFrom(arr); + }; + /** grab named capture group results */ + + + exports.groups = function (target) { + if (target === undefined) { + return getGroups(this); + } + + if (typeof target === 'number') { + target = String(target); + } + + return getOneName(this, target) || this.buildFrom([]); + }; + + exports.group = exports.groups; + /** get the full-sentence each phrase belongs to */ + + exports.sentences = function (n) { + var arr = []; + this.list.forEach(function (p) { + arr.push(p.fullSentence()); + }); + + if (typeof n === 'number') { + return this.buildFrom([arr[n]]); + } + + return this.buildFrom(arr); + }; + + exports.sentence = exports.sentences; + }); + + // cache the easier conditions up-front + var cacheRequired = function cacheRequired(reg) { + var needTags = []; + var needWords = []; + reg.forEach(function (obj) { + if (obj.optional === true || obj.negative === true) { + return; + } + + if (obj.tag !== undefined) { + needTags.push(obj.tag); + } + + if (obj.word !== undefined) { + needWords.push(obj.word); + } + }); + return { + tags: needTags, + words: needWords + }; + }; + + var failFast$1 = function failFast(doc, regs) { + if (doc._cache && doc._cache.set === true) { + var _cacheRequired = cacheRequired(regs), + words = _cacheRequired.words, + tags = _cacheRequired.tags; //check required words + + + for (var i = 0; i < words.length; i++) { + if (doc._cache.words[words[i]] === undefined) { + return false; + } + } //check required tags + + + for (var _i = 0; _i < tags.length; _i++) { + if (doc._cache.tags[tags[_i]] === undefined) { + return false; + } + } + } + + return true; + }; + + var checkCache = failFast$1; + + var _03Match = createCommonjsModule(function (module, exports) { + /** return a new Doc, with this one as a parent */ + exports.match = function (reg, name) { + //parse-up the input expression + var regs = syntax_1(reg); + + if (regs.length === 0) { + return this.buildFrom([]); + } //check our cache, if it exists + + + if (checkCache(this, regs) === false) { + return this.buildFrom([]); + } //try expression on each phrase + + + var matches = this.list.reduce(function (arr, p) { + return arr.concat(p.match(regs)); + }, []); + + if (name !== undefined && name !== null && name !== '') { + return this.buildFrom(matches).groups(name); + } + + return this.buildFrom(matches); + }; + /** return all results except for this */ + + + exports.not = function (reg) { + //parse-up the input expression + var regs = syntax_1(reg); //if it's empty, return them all! + + if (regs.length === 0 || checkCache(this, regs) === false) { + return this; + } //try expression on each phrase + + + var matches = this.list.reduce(function (arr, p) { + return arr.concat(p.not(regs)); + }, []); + return this.buildFrom(matches); + }; + /** return only the first match */ + + + exports.matchOne = function (reg) { + var regs = syntax_1(reg); //check our cache, if it exists + + if (checkCache(this, regs) === false) { + return this.buildFrom([]); + } + + for (var i = 0; i < this.list.length; i++) { + var match = this.list[i].match(regs, true); + return this.buildFrom(match); + } + + return this.buildFrom([]); + }; + /** return each current phrase, only if it contains this match */ + + + exports["if"] = function (reg) { + var regs = syntax_1(reg); //consult our cache, if it exists + + if (checkCache(this, regs) === false) { + return this.buildFrom([]); + } + + var found = this.list.filter(function (p) { + return p.has(regs) === true; + }); + return this.buildFrom(found); + }; + /** Filter-out any current phrases that have this match*/ + + + exports.ifNo = function (reg) { + var regs = syntax_1(reg); + var found = this.list.filter(function (p) { + return p.has(regs) === false; + }); + return this.buildFrom(found); + }; + /**Return a boolean if this match exists */ + + // function (reg:string|string [],all:boolean) -> boolean + exports.has = function (reg,all) { + // @blab+ + if (typeof reg == 'object' && reg.length) { + for(var i in reg) { + var check=exports.has.call(this,reg[i]); + if (all && !check) return false; + if (!all && check) return true; + } + return all?true:false; + } + var regs = syntax_1(reg); //consult our cache, if it exists + + if (checkCache(this, regs) === false) { + return false; + } + + return this.list.some(function (p) { + return p.has(regs) === true; + }); + }; + + // @blab; contains similar terms? + exports.hasSimilar = function (reg,thres,all) { + var terms = this.termList(); + var count=0; + thres=thres||90; + if (typeof reg=='string') reg=[reg]; + for(var i in terms) { + for (var j in reg) { + var check = similar_text(terms[i].text,reg[j],1)>thres; + if (!all && check) return true; + if (check) { count++; break } + } + } + return all?count==res.length:false; + } + /** match any terms after our matches, within the sentence */ + + + exports.lookAhead = function (reg) { + // find everything afterwards, by default + if (!reg) { + reg = '.*'; + } + + var regs = syntax_1(reg); + var matches = []; + this.list.forEach(function (p) { + matches = matches.concat(p.lookAhead(regs)); + }); + matches = matches.filter(function (p) { + return p; + }); + return this.buildFrom(matches); + }; + + exports.lookAfter = exports.lookAhead; + /** match any terms before our matches, within the sentence */ + + exports.lookBehind = function (reg) { + // find everything afterwards, by default + if (!reg) { + reg = '.*'; + } + + var regs = syntax_1(reg); + var matches = []; + this.list.forEach(function (p) { + matches = matches.concat(p.lookBehind(regs)); + }); + matches = matches.filter(function (p) { + return p; + }); + return this.buildFrom(matches); + }; + + exports.lookBefore = exports.lookBehind; + /** return all terms before a match, in each phrase */ + + exports.before = function (reg) { + var regs = syntax_1(reg); //only the phrases we care about + + var phrases = this["if"](regs).list; + var befores = phrases.map(function (p) { + var ids = p.terms().map(function (t) { + return t.id; + }); //run the search again + + var m = p.match(regs)[0]; + var index = ids.indexOf(m.start); //nothing is before a first-term match + + if (index === 0 || index === -1) { + return null; + } + + return p.buildFrom(p.start, index); + }); + befores = befores.filter(function (p) { + return p !== null; + }); + return this.buildFrom(befores); + }; + /** return all terms after a match, in each phrase */ + + + exports.after = function (reg) { + var regs = syntax_1(reg); //only the phrases we care about + + var phrases = this["if"](regs).list; + var befores = phrases.map(function (p) { + var terms = p.terms(); + var ids = terms.map(function (t) { + return t.id; + }); //run the search again + + var m = p.match(regs)[0]; + var index = ids.indexOf(m.start); //skip if nothing is after it + + if (index === -1 || !terms[index + m.length]) { + return null; + } //create the new phrase, after our match. + + + var id = terms[index + m.length].id; + var len = p.length - index - m.length; + return p.buildFrom(id, len); + }); + befores = befores.filter(function (p) { + return p !== null; + }); + return this.buildFrom(befores); + }; + /** return only results with this match afterwards */ + + + exports.hasAfter = function (reg) { + return this.filter(function (doc) { + return doc.lookAfter(reg).found; + }); + }; + /** return only results with this match before it */ + + + exports.hasBefore = function (reg) { + return this.filter(function (doc) { + return doc.lookBefore(reg).found; + }); + }; + }); + + /** apply a tag, or tags to all terms */ + var tagTerms = function tagTerms(tag, doc, safe, reason) { + var tagList = []; + + if (typeof tag === 'string') { + tagList = tag.split(' '); + } //do indepenent tags for each term: + + + doc.list.forEach(function (p) { + var terms = p.terms(); // tagSafe - apply only to fitting terms + + if (safe === true) { + terms = terms.filter(function (t) { + return t.canBe(tag, doc.world); + }); + } + + terms.forEach(function (t, i) { + //fancy version: + if (tagList.length > 1) { + if (tagList[i] && tagList[i] !== '.') { + t.tag(tagList[i], reason, doc.world); + } + } else { + //non-fancy version (same tag for all terms) + t.tag(tag, reason, doc.world); + } + }); + }); + return; + }; + + var _setTag = tagTerms; + + /** Give all terms the given tag */ + + var tag$1 = function tag(tags, why) { + if (!tags) { + return this; + } + + _setTag(tags, this, false, why); + return this; + }; + /** Only apply tag to terms if it is consistent with current tags */ + + + var tagSafe$1 = function tagSafe(tags, why) { + if (!tags) { + return this; + } + + _setTag(tags, this, true, why); + return this; + }; + /** Remove this term from the given terms */ + + + var unTag$1 = function unTag(tags, why) { + var _this = this; + + this.list.forEach(function (p) { + p.terms().forEach(function (t) { + return t.unTag(tags, why, _this.world); + }); + }); + return this; + }; + /** return only the terms that can be this tag*/ + + + var canBe$2 = function canBe(tag) { + if (!tag) { + return this; + } + + var world = this.world; + var matches = this.list.reduce(function (arr, p) { + return arr.concat(p.canBe(tag, world)); + }, []); + return this.buildFrom(matches); + }; + + var _04Tag = { + tag: tag$1, + tagSafe: tagSafe$1, + unTag: unTag$1, + canBe: canBe$2 + }; + + /* run each phrase through a function, and create a new document */ + var map = function map(fn) { + var _this = this; + + if (!fn) { + return this; + } + + var list = this.list.map(function (p, i) { + var doc = _this.buildFrom([p]); + + doc.from = null; //it's not a child/parent + + var res = fn(doc, i); // if its a doc, return one result + + if (res && res.list && res.list[0]) { + return res.list[0]; + } + + return res; + }); //remove nulls + + list = list.filter(function (x) { + return x; + }); // return an empty response + + if (list.length === 0) { + return this.buildFrom(list); + } // if it is not a list of Phrase objects, then don't try to make a Doc object + + + if (_typeof(list[0]) !== 'object' || list[0].isA !== 'Phrase') { + return list; + } + + return this.buildFrom(list); + }; + /** run a function on each phrase */ + + + var forEach = function forEach(fn, detachParent) { + var _this2 = this; + + if (!fn) { + return this; + } + + this.list.forEach(function (p, i) { + var sub = _this2.buildFrom([p]); // if we're doing fancy insertions, we may want to skip updating the parent each time. + + + if (detachParent === true) { + sub.from = null; // + } + + fn(sub, i); + }); + return this; + }; + /** return only the phrases that return true */ + + + var filter = function filter(fn) { + var _this3 = this; + + if (!fn) { + return this; + } + + var list = this.list.filter(function (p, i) { + var doc = _this3.buildFrom([p]); + + doc.from = null; //it's not a child/parent + + return fn(doc, i); + }); + return this.buildFrom(list); + }; + /** return a document with only the first phrase that matches */ + + + var find = function find(fn) { + var _this4 = this; + + if (!fn) { + return this; + } + + var phrase = this.list.find(function (p, i) { + var doc = _this4.buildFrom([p]); + + doc.from = null; //it's not a child/parent + + return fn(doc, i); + }); + + if (phrase) { + return this.buildFrom([phrase]); + } + + return undefined; + }; + /** return true or false if there is one matching phrase */ + + + var some = function some(fn) { + var _this5 = this; + + if (!fn) { + return this; + } + + return this.list.some(function (p, i) { + var doc = _this5.buildFrom([p]); + + doc.from = null; //it's not a child/parent + + return fn(doc, i); + }); + }; + /** sample a subset of the results */ + + + var random = function random(n) { + if (!this.found) { + return this; + } + + var r = Math.floor(Math.random() * this.list.length); + + if (n === undefined) { + var list = [this.list[r]]; + return this.buildFrom(list); + } //prevent it from going over the end + + + if (r + n > this.length) { + r = this.length - n; + r = r < 0 ? 0 : r; + } + + return this.slice(r, r + n); + }; + /** combine each phrase into a new data-structure */ + // exports.reduce = function(fn, h) { + // let list = this.list.reduce((_h, ts) => { + // let doc = this.buildFrom([ts]) + // doc.from = null //it's not a child/parent + // return fn(_h, doc) + // }, h) + // return this.buildFrom(list) + // } + + + var _05Loops = { + map: map, + forEach: forEach, + filter: filter, + find: find, + some: some, + random: random + }; + + // const tokenize = require('../../01-tokenizer/02-words') + var tokenize = function tokenize(str) { + return str.split(/[ -]/g); + }; // take a list of strings + // look them up in the document + + + var buildTree = function buildTree(termList) { + var values = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : []; + var root = {}; // parse our input + + termList.forEach(function (str, i) { + var val = true; + + if (values[i] !== undefined) { + val = values[i]; + } // some rough normalization + + + str = (str || '').toLowerCase(); + str = str.replace(/[,;.!?]+$/, ''); + var arr = tokenize(str).map(function (s) { + return s.trim(); + }); + root[arr[0]] = root[arr[0]] || {}; + + if (arr.length === 1) { + root[arr[0]].value = val; + } else { + root[arr[0]].more = root[arr[0]].more || []; + root[arr[0]].more.push({ + rest: arr.slice(1), + value: val + }); + } + }); // sort by longest-first? + // console.log(JSON.stringify(root, null, 2)) + + return root; + }; + + var fastLookup = function fastLookup(termList, values, doc) { + var root = buildTree(termList, values); + var found = []; // each phrase + + var _loop = function _loop(i) { + var p = doc.list[i]; + var terms = p.terms(); + var words = terms.map(function (t) { + return t.reduced; + }); // each word + + var _loop2 = function _loop2(w) { + if (root[words[w]] !== undefined) { + // is it a multi-word match? + if (root[words[w]].more !== undefined) { + root[words[w]].more.forEach(function (more) { + // is it too-long? + if (words[w + more.rest.length] === undefined) { + return; + } // compare each subsequent term + + + var everyTerm = more.rest.every(function (word, r) { + return word === words[w + r + 1]; + }); + + if (everyTerm === true) { + found.push({ + id: p.terms()[w].id, + value: more.value, + length: more.rest.length + 1 + }); + } + }); + } // is it a single-word match? + + + if (root[words[w]].value !== undefined) { + found.push({ + id: p.terms()[w].id, + value: root[words[w]].value, + length: 1 + }); + } + } + }; + + for (var w = 0; w < words.length; w++) { + _loop2(w); + } + }; + + for (var i = 0; i < doc.list.length; i++) { + _loop(i); + } + + return found; + }; + + var _lookup = fastLookup; + + var _06Lookup = createCommonjsModule(function (module, exports) { + // compare one term and one match + // const doesMatch = function(term, str) { + // if (str === '') { + // return false + // } + // return term.reduced === str || term.implicit === str || term.root === str || term.text.toLowerCase() === str + // } + var isObject = function isObject(obj) { + return obj && Object.prototype.toString.call(obj) === '[object Object]'; + }; + /** lookup an array of words or phrases */ + + + exports.lookup = function (arr) { + var _this = this; + + var values = []; //is it a {key:val} object? + + var isObj = isObject(arr); + + if (isObj === true) { + arr = Object.keys(arr).map(function (k) { + values.push(arr[k]); + return k; + }); + } // support .lookup('foo') + + + if (typeof arr === 'string') { + arr = [arr]; + } //make sure we go fast. + + + if (this._cache.set !== true) { + this.cache(); + } + + var found = _lookup(arr, values, this); + var p = this.list[0]; // make object response + + if (isObj === true) { + var byVal = {}; + found.forEach(function (o) { + byVal[o.value] = byVal[o.value] || []; + byVal[o.value].push(p.buildFrom(o.id, o.length)); + }); + Object.keys(byVal).forEach(function (k) { + byVal[k] = _this.buildFrom(byVal[k]); + }); + return byVal; + } // otherwise, make array response: + + + found = found.map(function (o) { + return p.buildFrom(o.id, o.length); + }); + return this.buildFrom(found); + }; + + exports.lookUp = exports.lookup; + }); + + /** freeze the current state of the document, for speed-purposes*/ + var cache$1 = function cache(options) { + var _this = this; + + options = options || {}; + var words = {}; + var tags = {}; + this._cache.words = words; + this._cache.tags = tags; + this._cache.set = true; + this.list.forEach(function (p, i) { + p.cache = p.cache || {}; //p.terms get cached automatically + + var terms = p.terms(); // cache all the terms + + terms.forEach(function (t) { + if (words[t.reduced] && !words.hasOwnProperty(t.reduced)) { + return; //skip prototype words + } + + words[t.reduced] = words[t.reduced] || []; + words[t.reduced].push(i); + Object.keys(t.tags).forEach(function (tag) { + tags[tag] = tags[tag] || []; + tags[tag].push(i); + }); // cache root-form on Term, too + + if (options.root) { + t.setRoot(_this.world); + words[t.root] = true; + } + }); + }); + return this; + }; + /** un-freezes the current state of the document, so it may be transformed */ + + + var uncache = function uncache() { + this._cache = {}; + this.list.forEach(function (p) { + p.cache = {}; + }); // do parents too? + + this.parents().forEach(function (doc) { + doc._cache = {}; + doc.list.forEach(function (p) { + p.cache = {}; + }); + }); + return this; + }; + + var _07Cache = { + cache: cache$1, + uncache: uncache + }; + + var titleCase$3 = function titleCase(str) { + return str.charAt(0).toUpperCase() + str.substr(1); + }; + /** substitute-in new content */ + + + var replaceWith = function replaceWith(replace) { + var _this = this; + + var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {}; + + if (!replace) { + return this["delete"](); + } //support old-style params + + + if (options === true) { + options = { + keepTags: true + }; + } + + if (options === false) { + options = { + keepTags: false + }; + } + + options = options || {}; // clear the cache + + this.uncache(); // return this + + this.list.forEach(function (p) { + var input = replace; // accept a function for replace + + if (typeof replace === 'function') { + input = replace(p); + } + + var newPhrases; // accept a Doc object to replace + + if (input && _typeof(input) === 'object' && input.isA === 'Doc') { + newPhrases = input.list; + + _this.pool().merge(input.pool()); + } else if (typeof input === 'string') { + //input is a string + if (options.keepCase !== false && p.terms(0).isTitleCase()) { + input = titleCase$3(input); + } + + newPhrases = _01Tokenizer(input, _this.world, _this.pool()); //tag the new phrases + + var tmpDoc = _this.buildFrom(newPhrases); + + tmpDoc.tagger(); + newPhrases = tmpDoc.list; + } else { + return; //don't even bother + } // try to keep its old tags, if appropriate + + + if (options.keepTags === true) { + var oldTags = p.json({ + terms: { + tags: true + } + }).terms; + newPhrases[0].terms().forEach(function (t, i) { + if (oldTags[i]) { + t.tagSafe(oldTags[i].tags, 'keptTag', _this.world); + } + }); + } + + p.replace(newPhrases[0], _this); //Oneday: support multi-sentence replacements + }); + return this; + }; + /** search and replace match with new content */ + + + var replace$1 = function replace(match, _replace, options) { + // if there's no 2nd param, use replaceWith + if (_replace === undefined) { + return this.replaceWith(match, options); + } + + this.match(match).replaceWith(_replace, options); + return this; + }; + + var _01Replace = { + replaceWith: replaceWith, + replace: replace$1 + }; + + var _02Insert = createCommonjsModule(function (module, exports) { + // if it's empty, just create the phrase + var makeNew = function makeNew(str, doc) { + var phrase = _01Tokenizer(str, doc.world)[0]; //assume it's one sentence, for now + + var tmpDoc = doc.buildFrom([phrase]); + tmpDoc.tagger(); + doc.list = tmpDoc.list; + return doc; + }; + /** add these new terms to the end*/ + + + exports.append = function (str) { + var _this = this; + + if (!str) { + return this; + } // if it's empty, just create the phrase + + + if (!this.found) { + return makeNew(str, this); + } // clear the cache + + + this.uncache(); //add it to end of every phrase + + this.list.forEach(function (p) { + //build it + var phrase = _01Tokenizer(str, _this.world, _this.pool())[0]; //assume it's one sentence, for now + //tag it + + var tmpDoc = _this.buildFrom([phrase]); + + tmpDoc.tagger(); // push it onto the end + + p.append(phrase, _this); + }); + return this; + }; + + exports.insertAfter = exports.append; + exports.insertAt = exports.append; + /** add these new terms to the front*/ + + exports.prepend = function (str) { + var _this2 = this; + + if (!str) { + return this; + } // if it's empty, just create the phrase + + + if (!this.found) { + return makeNew(str, this); + } // clear the cache + + + this.uncache(); //add it to start of every phrase + + this.list.forEach(function (p) { + //build it + var phrase = _01Tokenizer(str, _this2.world, _this2.pool())[0]; //assume it's one sentence, for now + //tag it + + var tmpDoc = _this2.buildFrom([phrase]); + + tmpDoc.tagger(); // add it to the start + + p.prepend(phrase, _this2); + }); + return this; + }; + + exports.insertBefore = exports.prepend; + /** add these new things to the end*/ + + exports.concat = function () { + // clear the cache + this.uncache(); + var list = this.list.slice(0); //repeat for any number of params + + for (var i = 0; i < arguments.length; i++) { + var arg = arguments[i]; //support a fresh string + + if (typeof arg === 'string') { + var arr = _01Tokenizer(arg, this.world); //TODO: phrase.tagger()? + + list = list.concat(arr); + } else if (arg.isA === 'Doc') { + list = list.concat(arg.list); + } else if (arg.isA === 'Phrase') { + list.push(arg); + } + } + + return this.buildFrom(list); + }; + /** fully remove these terms from the document */ + + + exports["delete"] = function (match) { + var _this3 = this; + + // clear the cache + this.uncache(); + var toRemove = this; + + if (match) { + toRemove = this.match(match); + } + + toRemove.list.forEach(function (phrase) { + return phrase["delete"](_this3); + }); + return this; + }; // aliases + + + exports.remove = exports["delete"]; + }); + + var shouldTrim = { + clean: true, + reduced: true, + root: true + }; + /** return the document as text */ + + var text$1 = function text(options) { + var _this = this; + + options = options || {}; //are we showing every phrase? + + var showFull = false; + + if (this.parents().length === 0) { + showFull = true; + } // cache roots, if necessary + + + if (options === 'root' || _typeof(options) === 'object' && options.root) { + this.list.forEach(function (p) { + p.terms().forEach(function (t) { + if (t.root === null) { + t.setRoot(_this.world); + } + }); + }); + } + + var txt = this.list.reduce(function (str, p, i) { + var trimPre = !showFull && i === 0; + var trimPost = !showFull && i === _this.list.length - 1; + return str + p.text(options, trimPre, trimPost); + }, ''); // clumsy final trim of leading/trailing whitespace + + if (shouldTrim[options] === true || options.reduced === true || options.clean === true || options.root === true) { + txt = txt.trim(); + } + + return txt; + }; + + var _01Text = { + text: text$1 + }; + + // get all character startings in doc + var termOffsets = function termOffsets(doc) { + var elapsed = 0; + var index = 0; + var offsets = {}; + doc.termList().forEach(function (term) { + offsets[term.id] = { + index: index, + start: elapsed + term.pre.length, + length: term.text.length + }; + elapsed += term.pre.length + term.text.length + term.post.length; + index += 1; + }); + return offsets; + }; + + var calcOffset = function calcOffset(doc, result, options) { + // calculate offsets for each term + var offsets = termOffsets(doc.all()); // add index values + + if (options.terms.index || options.index) { + result.forEach(function (o) { + o.terms.forEach(function (t) { + t.index = offsets[t.id].index; + }); + o.index = o.terms[0].index; + }); + } // add offset values + + + if (options.terms.offset || options.offset) { + result.forEach(function (o) { + o.terms.forEach(function (t) { + t.offset = offsets[t.id] || {}; + }); // let len = o.terms.reduce((n, t, i) => { + // n += t.offset.length || 0 + // //add whitespace, too + // console.log(t.post) + // return n + // }, 0) + // The offset information for the entire doc starts at (or just before) + // the first term, and is as long as the whole text. The code originally + // copied the entire offset value from terms[0], but since we're now + // overriding 2 of the three fields, it's cleaner to just create an all- + // new object and not pretend it's "just" the same as terms[0]. + + o.offset = { + index: o.terms[0].offset.index, + start: o.terms[0].offset.start - o.text.indexOf(o.terms[0].text), + length: o.text.length + }; + }); + } + }; + + var _offset = calcOffset; + + var _02Json = createCommonjsModule(function (module, exports) { + var jsonDefaults = { + text: true, + terms: true, + trim: true + }; //some options have dependents + + var setOptions = function setOptions(options) { + options = Object.assign({}, jsonDefaults, options); + + if (options.unique) { + options.reduced = true; + } //offset calculation requires these options to be on + + + if (options.offset) { + options.text = true; + + if (!options.terms || options.terms === true) { + options.terms = {}; + } + + options.terms.offset = true; + } + + if (options.index || options.terms.index) { + options.terms = options.terms === true ? {} : options.terms; + options.terms.id = true; + } + + return options; + }; + /** pull out desired metadata from the document */ + + + exports.json = function () { + var _this = this; + + var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {}; + + //support json(3) format + if (typeof options === 'number' && this.list[options]) { + return this.list[options].json(jsonDefaults); + } + + options = setOptions(options); // cache root strings beforehand, if necessary + + if (options.root === true) { + this.list.forEach(function (p) { + p.terms().forEach(function (t) { + if (t.root === null) { + t.setRoot(_this.world); + } + }); + }); + } + + var result = this.list.map(function (p) { + return p.json(options, _this.world); + }); // add offset and index data for each term + + if (options.terms.offset || options.offset || options.terms.index || options.index) { + _offset(this, result, options); + } // add frequency #s + + + if (options.frequency || options.freq || options.count) { + var obj = {}; + this.list.forEach(function (p) { + var str = p.text('reduced'); + obj[str] = obj[str] || 0; + obj[str] += 1; + }); + this.list.forEach(function (p, i) { + result[i].count = obj[p.text('reduced')]; + }); + } // remove duplicates + + + if (options.unique) { + var already = {}; + result = result.filter(function (o) { + if (already[o.reduced] === true) { + return false; + } + + already[o.reduced] = true; + return true; + }); + } + + return result; + }; //aliases + + + exports.data = exports.json; + }); + + var _debug = createCommonjsModule(function (module) { + // https://stackoverflow.com/questions/9781218/how-to-change-node-jss-console-font-color + var reset = '\x1b[0m'; + + var padEnd = function padEnd(str, width) { + str = str.toString(); + + while (str.length < width) { + str += ' '; + } + + return str; + }; + + function isClientSide() { + return typeof window !== 'undefined' && window.document; + } // some nice colors for client-side debug + + + var css = { + green: '#7f9c6c', + red: '#914045', + blue: '#6699cc', + magenta: '#6D5685', + cyan: '#2D85A8', + yellow: '#e6d7b3', + black: '#303b50' + }; + + var logClientSide = function logClientSide(doc) { + var tagset = doc.world.tags; + doc.list.forEach(function (p) { + console.log('\n%c"' + p.text() + '"', 'color: #e6d7b3;'); + var terms = p.terms(); + terms.forEach(function (t) { + var tags = Object.keys(t.tags); + var text = t.text || '-'; + + if (t.implicit) { + text = '[' + t.implicit + ']'; + } + + var word = "'" + text + "'"; + word = padEnd(word, 8); + var found = tags.find(function (tag) { + return tagset[tag] && tagset[tag].color; + }); + var color = 'steelblue'; + + if (tagset[found]) { + color = tagset[found].color; + color = css[color]; + } + + console.log(" ".concat(word, " - %c").concat(tags.join(', ')), "color: ".concat(color || 'steelblue', ";")); + }); + }); + }; //cheaper than requiring chalk + + + var cli = { + green: function green(str) { + return '\x1b[32m' + str + reset; + }, + red: function red(str) { + return '\x1b[31m' + str + reset; + }, + blue: function blue(str) { + return '\x1b[34m' + str + reset; + }, + magenta: function magenta(str) { + return '\x1b[35m' + str + reset; + }, + cyan: function cyan(str) { + return '\x1b[36m' + str + reset; + }, + yellow: function yellow(str) { + return '\x1b[33m' + str + reset; + }, + black: function black(str) { + return '\x1b[30m' + str + reset; + } + }; + + var tagString = function tagString(tags, world) { + tags = tags.map(function (tag) { + if (!world.tags.hasOwnProperty(tag)) { + return tag; + } + + var c = world.tags[tag].color || 'blue'; + return cli[c](tag); + }); + return tags.join(', '); + }; //output some helpful stuff to the console + + + var debug = function debug(doc) { + if (isClientSide()) { + logClientSide(doc); + return doc; + } + + console.log(cli.blue('=====')); + doc.list.forEach(function (p) { + console.log(cli.blue(' -----')); + var terms = p.terms(); + terms.forEach(function (t) { + var tags = Object.keys(t.tags); + var text = t.text || '-'; + + if (t.implicit) { + text = '[' + t.implicit + ']'; + } + + { + text = cli.yellow(text); + } + + var word = "'" + text + "'"; + word = padEnd(word, 18); + var str = cli.blue(' | ') + word + ' - ' + tagString(tags, doc.world); + console.log(str); + }); + }); + console.log(''); + return doc; + }; + + module.exports = debug; + }); + + var topk = function topk(doc) { + var list = doc.json({ + text: false, + terms: false, + reduced: true + }); // combine them + + var obj = {}; + list.forEach(function (o) { + if (!obj[o.reduced]) { + o.count = 0; + obj[o.reduced] = o; + } + + obj[o.reduced].count += 1; + }); + var arr = Object.keys(obj).map(function (k) { + return obj[k]; + }); // sort them + + arr.sort(function (a, b) { + if (a.count > b.count) { + return -1; + } else if (a.count < b.count) { + return 1; + } + + return 0; + }); + return arr; + }; + + var _topk = topk; + + /** pretty-print the current document and its tags */ + + var debug_1 = function debug_1() { + _debug(this); + return this; + }; + /** some named output formats */ + + + var out = function out(method) { + if (method === 'text') { + return this.text(); + } + + if (method === 'normal') { + return this.text('normal'); + } + + if (method === 'json') { + return this.json(); + } + + if (method === 'offset' || method === 'offsets') { + return this.json({ + offset: true + }); + } + + if (method === 'array') { + return this.json({ + terms: false + }).map(function (obj) { + return obj.text; + }); + } + + if (method === 'freq' || method === 'frequency') { + return _topk(this); + } + + if (method === 'terms') { + var list = []; + this.json({ + text: false, + terms: { + text: true + } + }).forEach(function (obj) { + var terms = obj.terms.map(function (t) { + return t.text; + }); + terms = terms.filter(function (t) { + return t; + }); + list = list.concat(terms); + }); + return list; + } + + if (method === 'tags') { + return this.list.map(function (p) { + return p.terms().reduce(function (h, t) { + h[t.clean || t.implicit] = Object.keys(t.tags); + return h; + }, {}); + }); + } + + if (method === 'debug') { + _debug(this); + return this; + } + + return this.text(); + }; + + var _03Out = { + debug: debug_1, + out: out + }; + + var methods$2 = { + /** alphabetical order */ + alpha: function alpha(a, b) { + var left = a.text('clean'); + var right = b.text('clean'); + + if (left < right) { + return -1; + } + + if (left > right) { + return 1; + } + + return 0; + }, + + /** count the # of characters of each match */ + length: function length(a, b) { + var left = a.text().trim().length; + var right = b.text().trim().length; + + if (left < right) { + return 1; + } + + if (left > right) { + return -1; + } + + return 0; + }, + + /** count the # of terms in each match */ + wordCount: function wordCount(a, b) { + var left = a.wordCount(); + var right = b.wordCount(); + + if (left < right) { + return 1; + } + + if (left > right) { + return -1; + } + + return 0; + } + }; + /** sort by # of duplicates in the document*/ + + var byFreq = function byFreq(doc) { + var counts = {}; + var options = { + "case": true, + punctuation: false, + whitespace: true, + unicode: true + }; + doc.list.forEach(function (p) { + var str = p.text(options); + counts[str] = counts[str] || 0; + counts[str] += 1; + }); // sort by freq + + doc.list.sort(function (a, b) { + var left = counts[a.text(options)]; + var right = counts[b.text(options)]; + + if (left < right) { + return 1; + } + + if (left > right) { + return -1; + } + + return 0; + }); + return doc; + }; // order results 'chronologically', or document-order + + + var sortSequential = function sortSequential(doc) { + var order = {}; + doc.json({ + terms: { + offset: true + } + }).forEach(function (o) { + order[o.terms[0].id] = o.terms[0].offset.start; + }); + doc.list = doc.list.sort(function (a, b) { + if (order[a.start] > order[b.start]) { + return 1; + } else if (order[a.start] < order[b.start]) { + return -1; + } + + return 0; + }); + return doc; + }; //aliases + + + methods$2.alphabetical = methods$2.alpha; + methods$2.wordcount = methods$2.wordCount; // aliases for sequential ordering + + var seqNames = { + index: true, + sequence: true, + seq: true, + sequential: true, + chron: true, + chronological: true + }; + /** re-arrange the order of the matches (in place) */ + + var sort = function sort(input) { + input = input || 'alpha'; //do this one up-front + + if (input === 'freq' || input === 'frequency' || input === 'topk') { + return byFreq(this); + } + + if (seqNames.hasOwnProperty(input)) { + return sortSequential(this); + } + + input = methods$2[input] || input; // apply sort method on each phrase + + if (typeof input === 'function') { + this.list = this.list.sort(input); + return this; + } + + return this; + }; + /** reverse the order of the matches, but not the words */ + + + var reverse = function reverse() { + var list = [].concat(this.list); + list = list.reverse(); + return this.buildFrom(list); + }; + /** remove any duplicate matches */ + + + var unique$4 = function unique() { + var list = [].concat(this.list); + var obj = {}; + list = list.filter(function (p) { + var str = p.text('reduced').trim(); + + if (obj.hasOwnProperty(str) === true) { + return false; + } + + obj[str] = true; + return true; + }); + return this.buildFrom(list); + }; + + var _01Sort = { + sort: sort, + reverse: reverse, + unique: unique$4 + }; + + var isPunct = /[\[\]{}⟨⟩:,،、‒–—―…‹›«»‐\-;\/⁄·*\•^†‡°¡¿※№÷׺ª%‰=‱¶§~|‖¦©℗®℠™¤₳฿]/g; + var quotes = /['‘’“”"′″‴]+/g; + var methods$3 = { + // cleanup newlines and extra spaces + whitespace: function whitespace(doc) { + var termArr = doc.list.map(function (ts) { + return ts.terms(); + }); + termArr.forEach(function (terms, o) { + terms.forEach(function (t, i) { + // keep dashes between words + if (t.hasDash() === true) { + t.post = ' - '; + return; + } // remove existing spaces + + + t.pre = t.pre.replace(/\s/g, ''); + t.post = t.post.replace(/\s/g, ''); //last word? ensure there's a next sentence. + + if (terms.length - 1 === i && !termArr[o + 1]) { + return; + } // no extra spaces for contractions + + + if (t.implicit && Boolean(t.text) === true) { + return; + } // no extra spaces for hyphenated words + + + if (t.hasHyphen() === true) { + return; + } + + t.post += ' '; + }); + }); + }, + punctuation: function punctuation(termList) { + termList.forEach(function (t) { + // space between hyphenated words + if (t.hasHyphen() === true) { + t.post = ' '; + } + + t.pre = t.pre.replace(isPunct, ''); + t.post = t.post.replace(isPunct, ''); // elipses + + t.post = t.post.replace(/\.\.\./, ''); // only allow one exclamation + + if (/!/.test(t.post) === true) { + t.post = t.post.replace(/!/g, ''); + t.post = '!' + t.post; + } // only allow one question mark + + + if (/\?/.test(t.post) === true) { + t.post = t.post.replace(/[\?!]*/, ''); + t.post = '?' + t.post; + } + }); + }, + unicode: function unicode(termList) { + termList.forEach(function (t) { + if (t.isImplicit() === true) { + return; + } + + t.text = unicode_1(t.text); + }); + }, + quotations: function quotations(termList) { + termList.forEach(function (t) { + t.post = t.post.replace(quotes, ''); + t.pre = t.pre.replace(quotes, ''); + }); + }, + adverbs: function adverbs(doc) { + doc.match('#Adverb').not('(not|nary|seldom|never|barely|almost|basically|so)').remove(); + }, + // remove the '.' from 'Mrs.' (safely) + abbreviations: function abbreviations(doc) { + doc.list.forEach(function (ts) { + var terms = ts.terms(); + terms.forEach(function (t, i) { + if (t.tags.Abbreviation === true && terms[i + 1]) { + t.post = t.post.replace(/^\./, ''); + } + }); + }); + } + }; + var _methods = methods$3; + + var defaults = { + // light + whitespace: true, + unicode: true, + punctuation: true, + emoji: true, + acronyms: true, + abbreviations: true, + // medium + "case": false, + contractions: false, + parentheses: false, + quotations: false, + adverbs: false, + // heavy (loose legibility) + possessives: false, + verbs: false, + nouns: false, + honorifics: false // pronouns: true, + + }; + var mapping$1 = { + light: {}, + medium: { + "case": true, + contractions: true, + parentheses: true, + quotations: true, + adverbs: true + } + }; + mapping$1.heavy = Object.assign({}, mapping$1.medium, { + possessives: true, + verbs: true, + nouns: true, + honorifics: true + }); + /** common ways to clean-up the document, and reduce noise */ + + var normalize = function normalize(options) { + options = options || {}; // support named forms + + if (typeof options === 'string') { + options = mapping$1[options] || {}; + } // set defaults + + + options = Object.assign({}, defaults, options); // clear the cache + + this.uncache(); + var termList = this.termList(); // lowercase things + + if (options["case"]) { + this.toLowerCase(); + } //whitespace + + + if (options.whitespace) { + _methods.whitespace(this); + } // unicode: é -> e + + + if (options.unicode) { + _methods.unicode(termList); + } //punctuation - keep sentence punctation, quotes, parenths + + + if (options.punctuation) { + _methods.punctuation(termList); + } // remove ':)' + + + if (options.emoji) { + this.remove('(#Emoji|#Emoticon)'); + } // 'f.b.i.' -> 'FBI' + + + if (options.acronyms) { + this.acronyms().strip(); // .toUpperCase() + } // remove period from abbreviations + + + if (options.abbreviations) { + _methods.abbreviations(this); + } // --Medium methods-- + // `isn't` -> 'is not' + + + if (options.contraction || options.contractions) { + this.contractions().expand(); + } // '(word)' -> 'word' + + + if (options.parentheses) { + this.parentheses().unwrap(); + } // remove "" punctuation + + + if (options.quotations || options.quotes) { + _methods.quotations(termList); + } // remove any un-necessary adverbs + + + if (options.adverbs) { + _methods.adverbs(this); + } // --Heavy methods-- + // `cory hart's -> cory hart' + + + if (options.possessive || options.possessives) { + this.possessives().strip(); + } // 'he walked' -> 'he walk' + + + if (options.verbs) { + this.verbs().toInfinitive(); + } // 'three dogs' -> 'three dog' + + + if (options.nouns || options.plurals) { + this.nouns().toSingular(); + } // remove 'Mr.' from 'Mr John Smith' + + + if (options.honorifics) { + this.remove('#Honorific'); + } + + return this; + }; + + var _02Normalize = { + normalize: normalize + }; + + var _03Split = createCommonjsModule(function (module, exports) { + /** return a Document with three parts for every match + * seperate everything before the word, as a new phrase + */ + exports.splitOn = function (reg) { + // if there's no match, split parent, instead + if (!reg) { + var parent = this.parent(); + return parent.splitOn(this); + } //start looking for a match.. + + + var regs = syntax_1(reg); + var matches = []; + this.list.forEach(function (p) { + var foundEm = p.match(regs); //no match here, add full sentence + + if (foundEm.length === 0) { + matches.push(p); + return; + } // we found something here. + + + var carry = p; + foundEm.forEach(function (found) { + var parts = carry.splitOn(found); // add em in + + if (parts.before) { + matches.push(parts.before); + } + + if (parts.match) { + matches.push(parts.match); + } // start matching now on the end + + + carry = parts.after; + }); // add that last part + + if (carry) { + matches.push(carry); + } + }); + return this.buildFrom(matches); + }; + /** return a Document with two parts for every match + * seperate everything after the word, as a new phrase + */ + + + exports.splitAfter = function (reg) { + // if there's no match, split parent, instead + if (!reg) { + var parent = this.parent(); + return parent.splitAfter(this); + } // start looking for our matches + + + var regs = syntax_1(reg); + var matches = []; + this.list.forEach(function (p) { + var foundEm = p.match(regs); //no match here, add full sentence + + if (foundEm.length === 0) { + matches.push(p); + return; + } // we found something here. + + + var carry = p; + foundEm.forEach(function (found) { + var parts = carry.splitOn(found); // add em in + + if (parts.before && parts.match) { + // merge these two together + parts.before.length += parts.match.length; + matches.push(parts.before); + } else if (parts.match) { + matches.push(parts.match); + } // start matching now on the end + + + carry = parts.after; + }); // add that last part + + if (carry) { + matches.push(carry); + } + }); + return this.buildFrom(matches); + }; + + exports.split = exports.splitAfter; //i guess? + + /** return a Document with two parts for every match */ + + exports.splitBefore = function (reg) { + // if there's no match, split parent, instead + if (!reg) { + var parent = this.parent(); + return parent.splitBefore(this); + } //start looking for a match.. + + + var regs = syntax_1(reg); + var matches = []; + this.list.forEach(function (p) { + var foundEm = p.match(regs); //no match here, add full sentence + + if (foundEm.length === 0) { + matches.push(p); + return; + } // we found something here. + + + var carry = p; + foundEm.forEach(function (found) { + var parts = carry.splitOn(found); // add before part in + + if (parts.before) { + matches.push(parts.before); + } // merge match+after + + + if (parts.match && parts.after) { + parts.match.length += parts.after.length; + } // start matching now on the end + + + carry = parts.match; + }); // add that last part + + if (carry) { + matches.push(carry); + } + }); + return this.buildFrom(matches); + }; + /** split a document into labeled sections */ + + + exports.segment = function (regs, options) { + regs = regs || {}; + options = options || { + text: true + }; + var doc = this; + var keys = Object.keys(regs); // split em + + keys.forEach(function (k) { + doc = doc.splitOn(k); + }); //add labels for each section + + doc.list.forEach(function (p) { + for (var i = 0; i < keys.length; i += 1) { + if (p.has(keys[i])) { + p.segment = regs[keys[i]]; + return; + } + } + }); + return doc.list.map(function (p) { + var res = p.json(options); + res.segment = p.segment || null; + return res; + }); + }; + }); + + var eachTerm = function eachTerm(doc, fn) { + var world = doc.world; + doc.list.forEach(function (p) { + p.terms().forEach(function (t) { + return t[fn](world); + }); + }); + return doc; + }; + /** turn every letter of every term to lower-cse */ + + + var toLowerCase = function toLowerCase() { + return eachTerm(this, 'toLowerCase'); + }; + /** turn every letter of every term to upper case */ + + + var toUpperCase = function toUpperCase() { + return eachTerm(this, 'toUpperCase'); + }; + /** upper-case the first letter of each term */ + + + var toTitleCase = function toTitleCase() { + return eachTerm(this, 'toTitleCase'); + }; + /** remove whitespace and title-case each term */ + + + var toCamelCase = function toCamelCase() { + this.list.forEach(function (p) { + //remove whitespace + var terms = p.terms(); + terms.forEach(function (t, i) { + if (i !== 0) { + t.toTitleCase(); + } + + if (i !== terms.length - 1) { + t.post = ''; + } + }); + }); // this.tag('#CamelCase', 'toCamelCase') + + return this; + }; + + var _04Case = { + toLowerCase: toLowerCase, + toUpperCase: toUpperCase, + toTitleCase: toTitleCase, + toCamelCase: toCamelCase + }; + + var _05Whitespace = createCommonjsModule(function (module, exports) { + /** add this punctuation or whitespace before each match: */ + exports.pre = function (str, concat) { + if (str === undefined) { + return this.list[0].terms(0).pre; + } + + this.list.forEach(function (p) { + var term = p.terms(0); + + if (concat === true) { + term.pre += str; + } else { + term.pre = str; + } + }); + return this; + }; + /** add this punctuation or whitespace after each match: */ + + + exports.post = function (str, concat) { + // return array of post strings + if (str === undefined) { + return this.list.map(function (p) { + var terms = p.terms(); + var term = terms[terms.length - 1]; + return term.post; + }); + } // set post string on all ends + + + this.list.forEach(function (p) { + var terms = p.terms(); + var term = terms[terms.length - 1]; + + if (concat === true) { + term.post += str; + } else { + term.post = str; + } + }); + return this; + }; + /** remove start and end whitespace */ + + + exports.trim = function () { + this.list = this.list.map(function (p) { + return p.trim(); + }); + return this; + }; + /** connect words with hyphen, and remove whitespace */ + + + exports.hyphenate = function () { + this.list.forEach(function (p) { + var terms = p.terms(); //remove whitespace + + terms.forEach(function (t, i) { + if (i !== 0) { + t.pre = ''; + } + + if (terms[i + 1]) { + t.post = '-'; + } + }); + }); + return this; + }; + /** remove hyphens between words, and set whitespace */ + + + exports.dehyphenate = function () { + var hasHyphen = /(-|–|—)/; + this.list.forEach(function (p) { + var terms = p.terms(); //remove whitespace + + terms.forEach(function (t) { + if (hasHyphen.test(t.post)) { + t.post = ' '; + } + }); + }); + return this; + }; + + exports.deHyphenate = exports.dehyphenate; + /** add quotations around these matches */ + + exports.toQuotations = function (start, end) { + start = start || "\""; + end = end || "\""; + this.list.forEach(function (p) { + var terms = p.terms(); + terms[0].pre = start + terms[0].pre; + var last = terms[terms.length - 1]; + last.post = end + last.post; + }); + return this; + }; + + exports.toQuotation = exports.toQuotations; + /** add brackets around these matches */ + + exports.toParentheses = function (start, end) { + start = start || "("; + end = end || ")"; + this.list.forEach(function (p) { + var terms = p.terms(); + terms[0].pre = start + terms[0].pre; + var last = terms[terms.length - 1]; + last.post = end + last.post; + }); + return this; + }; + }); + + /** make all phrases into one phrase */ + var join = function join(str) { + // clear the cache + this.uncache(); // make one large phrase - 'main' + + var main = this.list[0]; + var before = main.length; + var removed = {}; + + for (var i = 1; i < this.list.length; i++) { + var p = this.list[i]; + removed[p.start] = true; + var term = main.lastTerm(); // add whitespace between them + + if (str) { + term.post += str; + } // main -> p + + + term.next = p.start; // main <- p + + p.terms(0).prev = term.id; + main.length += p.length; + main.cache = {}; + } // parents are bigger than than their children. + // when we increase a child, we increase their parent too. + + + var increase = main.length - before; + this.parents().forEach(function (doc) { + // increase length on each effected phrase + doc.list.forEach(function (p) { + var terms = p.terms(); + + for (var _i = 0; _i < terms.length; _i++) { + if (terms[_i].id === main.start) { + p.length += increase; + break; + } + } + + p.cache = {}; + }); // remove redundant phrases now + + doc.list = doc.list.filter(function (p) { + return removed[p.start] !== true; + }); + }); // return one major phrase + + return this.buildFrom([main]); + }; + + var _06Join = { + join: join + }; + + var postPunct = /[,\)"';:\-–—\.…]/; // const irregulars = { + // 'will not': `won't`, + // 'i am': `i'm`, + // } + + var setContraction = function setContraction(m, suffix) { + if (!m.found) { + return; + } + + var terms = m.termList(); //avoid any problematic punctuation + + for (var i = 0; i < terms.length - 1; i++) { + var t = terms[i]; + + if (postPunct.test(t.post)) { + return; + } + } // set them as implict + + + terms.forEach(function (t) { + t.implicit = t.clean; + }); // perform the contraction + + terms[0].text += suffix; // clean-up the others + + terms.slice(1).forEach(function (t) { + t.text = ''; + }); + + for (var _i = 0; _i < terms.length - 1; _i++) { + var _t = terms[_i]; + _t.post = _t.post.replace(/ /, ''); + } + }; + /** turn 'i am' into i'm */ + + + var contract = function contract() { + var doc = this.not('@hasContraction'); // we are -> we're + + var m = doc.match('(we|they|you) are'); + setContraction(m, "'re"); // they will -> they'll + + m = doc.match('(he|she|they|it|we|you) will'); + setContraction(m, "'ll"); // she is -> she's + + m = doc.match('(he|she|they|it|we) is'); + setContraction(m, "'s"); // spencer is -> spencer's + + m = doc.match('#Person is'); + setContraction(m, "'s"); // spencer would -> spencer'd + + m = doc.match('#Person would'); + setContraction(m, "'d"); // would not -> wouldn't + + m = doc.match('(is|was|had|would|should|could|do|does|have|has|can) not'); + setContraction(m, "n't"); // i have -> i've + + m = doc.match('(i|we|they) have'); + setContraction(m, "'ve"); // would have -> would've + + m = doc.match('(would|should|could) have'); + setContraction(m, "'ve"); // i am -> i'm + + m = doc.match('i am'); + setContraction(m, "'m"); // going to -> gonna + + m = doc.match('going to'); + return this; + }; + + var _07Contract = { + contract: contract + }; + + var methods$4 = Object.assign({}, _01Utils$1, _02Accessors, _03Match, _04Tag, _05Loops, _06Lookup, _07Cache, _01Replace, _02Insert, _01Text, _02Json, _03Out, _01Sort, _02Normalize, _03Split, _04Case, _05Whitespace, _06Join, _07Contract); + + var methods$5 = {}; // allow helper methods like .adjectives() and .adverbs() + + var arr = [['terms', '.'], ['hyphenated', '@hasHyphen .'], ['adjectives', '#Adjective'], ['hashTags', '#HashTag'], ['emails', '#Email'], ['emoji', '#Emoji'], ['emoticons', '#Emoticon'], ['atMentions', '#AtMention'], ['urls', '#Url'], ['adverbs', '#Adverb'], ['pronouns', '#Pronoun'], ['conjunctions', '#Conjunction'], ['prepositions', '#Preposition']]; + arr.forEach(function (a) { + methods$5[a[0]] = function (n) { + var m = this.match(a[1]); + + if (typeof n === 'number') { + m = m.get(n); + } + + return m; + }; + }); // aliases + + methods$5.emojis = methods$5.emoji; + methods$5.atmentions = methods$5.atMentions; + methods$5.words = methods$5.terms; + /** return anything tagged as a phone number */ + + methods$5.phoneNumbers = function (n) { + var m = this.splitAfter('@hasComma'); + m = m.match('#PhoneNumber+'); + + if (typeof n === 'number') { + m = m.get(n); + } + + return m; + }; + /** Deprecated: please use compromise-numbers plugin */ + + + methods$5.money = function (n) { + var m = this.match('#Money #Currency?'); + + if (typeof n === 'number') { + m = m.get(n); + } + + return m; + }; + /** return all cities, countries, addresses, and regions */ + + + methods$5.places = function (n) { + // don't split 'paris, france' + var keep = this.match('(#City && @hasComma) (#Region|#Country)'); // but split the other commas + + var m = this.not(keep).splitAfter('@hasComma'); // combine them back together + + m = m.concat(keep); + m.sort('index'); + m = m.match('#Place+'); + + if (typeof n === 'number') { + m = m.get(n); + } + + return m; + }; + /** return all schools, businesses and institutions */ + + + methods$5.organizations = function (n) { + var m = this.clauses(); + m = m.match('#Organization+'); + + if (typeof n === 'number') { + m = m.get(n); + } + + return m; + }; //combine them with .topics() method + + + methods$5.entities = function (n) { + var r = this.clauses(); // Find people, places, and organizations + + var yup = r.people(); + yup = yup.concat(r.places()); + yup = yup.concat(r.organizations()); + var ignore = ['someone', 'man', 'woman', 'mother', 'brother', 'sister', 'father']; + yup = yup.not(ignore); //return them to normal ordering + + yup.sort('sequence'); // yup.unique() //? not sure + + if (typeof n === 'number') { + yup = yup.get(n); + } + + return yup; + }; //aliases + + + methods$5.things = methods$5.entities; + methods$5.topics = methods$5.entities; + var _simple = methods$5; + + var underOver = /^(under|over)-?/; + /** match a word-sequence, like 'super bowl' in the lexicon */ + + var tryMultiple = function tryMultiple(terms, t, world) { + var lex = world.words; //try a two-word version + + var txt = terms[t].reduced + ' ' + terms[t + 1].reduced; + + if (lex[txt] !== undefined && lex.hasOwnProperty(txt) === true) { + terms[t].tag(lex[txt], 'lexicon-two', world); + terms[t + 1].tag(lex[txt], 'lexicon-two', world); + return 1; + } //try a three-word version? + + + if (t + 2 < terms.length) { + txt += ' ' + terms[t + 2].reduced; + + if (lex[txt] !== undefined && lex.hasOwnProperty(txt) === true) { + terms[t].tag(lex[txt], 'lexicon-three', world); + terms[t + 1].tag(lex[txt], 'lexicon-three', world); + terms[t + 2].tag(lex[txt], 'lexicon-three', world); + return 2; + } + } //try a four-word version? + + + if (t + 3 < terms.length) { + txt += ' ' + terms[t + 3].reduced; + + if (lex[txt] !== undefined && lex.hasOwnProperty(txt) === true) { + terms[t].tag(lex[txt], 'lexicon-four', world); + terms[t + 1].tag(lex[txt], 'lexicon-four', world); + terms[t + 2].tag(lex[txt], 'lexicon-four', world); + terms[t + 3].tag(lex[txt], 'lexicon-four', world); + return 3; + } + } + + return 0; + }; + /** look at each word in our list of known-words */ + + + var checkLexicon = function checkLexicon(terms, world) { + var lex = world.words; + var hasCompound = world.hasCompound; // use reduced? + //go through each term, and check the lexicon + + for (var t = 0; t < terms.length; t += 1) { + var str = terms[t].clean; //is it the start of a compound word, like 'super bowl'? + + if (hasCompound[str] === true && t + 1 < terms.length) { + var foundWords = tryMultiple(terms, t, world); + + if (foundWords > 0) { + t += foundWords; //skip any already-found words + + continue; + } + } //try one-word lexicon + + + if (lex[str] !== undefined && lex.hasOwnProperty(str) === true) { + terms[t].tag(lex[str], 'lexicon', world); + continue; + } // look at reduced version of term, too + + + if (str !== terms[t].reduced && lex.hasOwnProperty(terms[t].reduced) === true) { + terms[t].tag(lex[terms[t].reduced], 'lexicon', world); + continue; + } // prefix strip: try to match 'take' for 'undertake' + + + if (underOver.test(str) === true) { + var noPrefix = str.replace(underOver, ''); + + if (lex.hasOwnProperty(noPrefix) === true) { + terms[t].tag(lex[noPrefix], 'noprefix-lexicon', world); + } + } + } + + return terms; + }; + + var _01Lexicon = checkLexicon; + + var apostrophes = /[\'‘’‛‵′`´]$/; + var perSec = /^(m|k|cm|km|m)\/(s|h|hr)$/; // '5 k/m' + // + + var checkPunctuation = function checkPunctuation(terms, i, world) { + var term = terms[i]; //check hyphenation + // if (term.post.indexOf('-') !== -1 && terms[i + 1] && terms[i + 1].pre === '') { + // term.tag('Hyphenated', 'has-hyphen', world) + // } + // support 'head-over' + // if (term.hasHyphen() === true) { + // console.log(term.tags) + // } + // console.log(term.hasHyphen(), term.text) + //an end-tick (trailing apostrophe) - flanders', or Carlos' + + if (apostrophes.test(term.text)) { + if (!apostrophes.test(term.pre) && !apostrophes.test(term.post) && term.clean.length > 2) { + var endChar = term.clean[term.clean.length - 2]; //flanders' + + if (endChar === 's') { + term.tag(['Possessive', 'Noun'], 'end-tick', world); + return; + } //chillin' + + + if (endChar === 'n') { + term.tag(['Gerund'], 'chillin', world); + } + } + } // '5 km/s' + + + if (perSec.test(term.text)) { + term.tag('Unit', 'per-sec', world); + } // 'NASA' is, but not 'i REALLY love it.' + // if (term.tags.Noun === true && isAcronym(term, world)) { + // term.tag('Acronym', 'acronym-step', world) + // term.tag('Noun', 'acronym-infer', world) + // } else if (!oneLetterWord.hasOwnProperty(term.text) && oneLetterAcronym.test(term.text)) { + // term.tag('Acronym', 'one-letter-acronym', world) + // term.tag('Noun', 'one-letter-infer', world) + // } + + }; + + var _02Punctuation$1 = checkPunctuation; + + //these are regexes applied to t.text, instead of t.clean + // order matters. + var startsWith = [//web tags + [/^[\w\.]+@[\w\.]+\.[a-z]{2,3}$/, 'Email'], //not fancy + [/^#[a-z0-9_\u00C0-\u00FF]{2,}$/, 'HashTag'], [/^@\w{2,}$/, 'AtMention'], [/^(https?:\/\/|www\.)\w+\.[a-z]{2,3}/, 'Url'], //with http/www + [/^[\w./]+\.(com|net|gov|org|ly|edu|info|biz|ru|jp|de|in|uk|br)/, 'Url'], //http://mostpopularwebsites.net/top-level-domain + //dates/times + [/^[012]?[0-9](:[0-5][0-9])(:[0-5][0-9])$/, 'Time'], //4:32:32 + [/^[012]?[0-9](:[0-5][0-9])?(:[0-5][0-9])? ?(am|pm)$/, 'Time'], //4pm + [/^[012]?[0-9](:[0-5][0-9])(:[0-5][0-9])? ?(am|pm)?$/, 'Time'], //4:00pm + [/^[PMCE]ST$/, 'Time'], //PST, time zone abbrevs + [/^utc ?[+-]?[0-9]+?$/, 'Time'], //UTC 8+ + [/^[a-z0-9]*? o\'?clock$/, 'Time'], //3 oclock + [/^[0-9]{1,4}-[0-9]{1,2}-[0-9]{1,4}$/, 'Date'], // 03-02-89 + [/^[0-9]{1,4}\/[0-9]{1,2}\/[0-9]{1,4}$/, 'Date'], // 03/02/89 + [/^[0-9]{1,4}-[a-z]{2,9}-[0-9]{1,4}$/i, 'Date'], // 03-March-89 + //names + [/^ma?c\'.*/, 'LastName'], //mc'adams + [/^o\'[drlkn].*/, 'LastName'], //o'douggan + [/^ma?cd[aeiou]/, 'LastName'], //macdonell - Last patterns https://en.wikipedia.org/wiki/List_of_family_name_affixes + //slang things + [/^(lol)+[sz]$/, 'Expression'], //lol + [/^woo+a*?h?$/, 'Expression'], //whoaa, wooo + [/^(un|de|re)\\-[a-z\u00C0-\u00FF]{2}/, 'Verb'], // [/^(over|under)[a-z]{2,}/, 'Adjective'], + [/^[0-9]{1,4}\.[0-9]{1,2}\.[0-9]{1,4}$/, 'Date'], // 03-02-89 + //phone numbers + [/^[0-9]{3}-[0-9]{4}$/, 'PhoneNumber'], //589-3809 + [/^(\+?[0-9][ -])?[0-9]{3}[ -]?[0-9]{3}-[0-9]{4}$/, 'PhoneNumber'], //632-589-3809 + //money + // currency regex + // /[\$\xA2-\xA5\u058F\u060B\u09F2\u09F3\u09FB\u0AF1\u0BF9\u0E3F\u17DB\u20A0-\u20BD\uA838\uFDFC\uFE69\uFF04\uFFE0\uFFE1\uFFE5\uFFE6] + //like $5.30 + [/^[-+]?[\$\xA2-\xA5\u058F\u060B\u09F2\u09F3\u09FB\u0AF1\u0BF9\u0E3F\u17DB\u20A0-\u20BD\uA838\uFDFC\uFE69\uFF04\uFFE0\uFFE1\uFFE5\uFFE6][-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?(k|m|b|bn)?\+?$/, ['Money', 'Value']], //like 5.30$ + [/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?[\$\xA2-\xA5\u058F\u060B\u09F2\u09F3\u09FB\u0AF1\u0BF9\u0E3F\u17DB\u20A0-\u20BD\uA838\uFDFC\uFE69\uFF04\uFFE0\uFFE1\uFFE5\uFFE6]\+?$/, ['Money', 'Value']], //like 400usd + [/^[-+]?[0-9]([0-9,.])+?(usd|eur|jpy|gbp|cad|aud|chf|cny|hkd|nzd|kr|rub)$/i, ['Money', 'Value']], //numbers + // 50 | -50 | 3.23 | 5,999.0 | 10+ + [/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?\+?$/, ['Cardinal', 'NumericValue']], [/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?(st|nd|rd|th)$/, ['Ordinal', 'NumericValue']], // .73th + [/^\.[0-9]+\+?$/, ['Cardinal', 'NumericValue']], //percent + [/^[-+]?[0-9]+(,[0-9]{3})*(\.[0-9]+)?%\+?$/, ['Percent', 'Cardinal', 'NumericValue']], //7% .. + [/^\.[0-9]+%$/, ['Percent', 'Cardinal', 'NumericValue']], //.7% .. + //fraction + [/^[0-9]{1,4}\/[0-9]{1,4}$/, 'Fraction'], //3/2ths + //range + [/^[0-9.]{1,2}[-–][0-9]{1,2}$/, ['Value', 'NumberRange']], //7-8 + [/^[0-9.]{1,4}(st|nd|rd|th)?[-–][0-9\.]{1,4}(st|nd|rd|th)?$/, 'NumberRange'], //5-7 + //with unit + [/^[0-9.]+([a-z]{1,4})$/, 'Value'] //like 5tbsp + //ordinal + // [/^[0-9][0-9,.]*(st|nd|rd|r?th)$/, ['NumericValue', 'Ordinal']], //like 5th + // [/^[0-9]+(st|nd|rd|th)$/, 'Ordinal'], //like 5th + ]; + + var romanNumeral = /^[IVXLCDM]{2,}$/; + var romanNumValid = /^M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})$/; // https://stackoverflow.com/a/267405/168877 + //try each of the ^regexes in our list + + var checkRegex = function checkRegex(term, world) { + var str = term.text; // do them all! + + for (var r = 0; r < startsWith.length; r += 1) { + if (startsWith[r][0].test(str) === true) { + term.tagSafe(startsWith[r][1], 'prefix #' + r, world); + break; + } + } // do some more! + //roman numberals - XVII + + + if (term.text.length >= 2 && romanNumeral.test(str) && romanNumValid.test(str)) { + term.tag('RomanNumeral', 'xvii', world); + } + }; + + var _03Prefixes = checkRegex; + + //regex suffix patterns and their most common parts of speech, + //built using wordnet, by spencer kelly. + //this mapping shrinks-down the uglified build + var Adj = 'Adjective'; + var Inf = 'Infinitive'; + var Pres = 'PresentTense'; + var Sing = 'Singular'; + var Past = 'PastTense'; + var Adverb = 'Adverb'; + var Exp = 'Expression'; + var Actor = 'Actor'; + var Verb = 'Verb'; + var Noun = 'Noun'; + var Last = 'LastName'; //the order here matters. + //regexes indexed by mandated last-character + + var endsWith$1 = { + a: [[/.[aeiou]na$/, Noun], [/.[oau][wvl]ska$/, Last], //polish (female) + [/.[^aeiou]ica$/, Sing], [/^([hyj]a)+$/, Exp] //hahah + ], + c: [[/.[^aeiou]ic$/, Adj]], + d: [//==-ed== + //double-consonant + [/[aeiou](pp|ll|ss|ff|gg|tt|rr|bb|nn|mm)ed$/, Past], //popped, planned + //double-vowel + [/.[aeo]{2}[bdgmnprvz]ed$/, Past], //beeped, mooned, veered + //-hed + [/.[aeiou][sg]hed$/, Past], //stashed, sighed + //-rd + [/.[aeiou]red$/, Past], //stored + [/.[aeiou]r?ried$/, Past], //buried + //-led + [/.[bcdgtr]led$/, Past], //startled, rumbled + [/.[aoui]f?led$/, Past], //impaled, stifled + //-sed + [/.[iao]sed$/, Past], //franchised + [/[aeiou]n?[cs]ed$/, Past], //laced, lanced + //-med + [/[aeiou][rl]?[mnf]ed$/, Past], //warmed, attained, engulfed + //-ked + [/[aeiou][ns]?c?ked$/, Past], //hooked, masked + //-ged + [/[aeiou][nl]?ged$/, Past], //engaged + //-ted + [/.[tdbwxz]ed$/, Past], //bribed, boxed + [/[^aeiou][aeiou][tvx]ed$/, Past], //boxed + //-ied + [/.[cdlmnprstv]ied$/, Past], //rallied + [/[^aeiou]ard$/, Sing], //card + [/[aeiou][^aeiou]id$/, Adj], [/.[vrl]id$/, Adj]], + e: [[/.[lnr]ize$/, Inf], [/.[^aeiou]ise$/, Inf], [/.[aeiou]te$/, Inf], [/.[^aeiou][ai]ble$/, Adj], [/.[^aeiou]eable$/, Adj], [/.[ts]ive$/, Adj]], + h: [[/.[^aeiouf]ish$/, Adj], [/.v[iy]ch$/, Last], //east-europe + [/^ug?h+$/, Exp], //uhh + [/^uh[ -]?oh$/, Exp] //uhoh + ], + i: [[/.[oau][wvl]ski$/, Last] //polish (male) + ], + k: [[/^(k){2}$/, Exp] //kkkk + ], + l: [[/.[gl]ial$/, Adj], [/.[^aeiou]ful$/, Adj], [/.[nrtumcd]al$/, Adj], [/.[^aeiou][ei]al$/, Adj]], + m: [[/.[^aeiou]ium$/, Sing], [/[^aeiou]ism$/, Sing], [/^h*u*m+$/, Exp], //mmmmmmm / ummmm / huuuuuummmmmm + [/^\d+ ?[ap]m$/, 'Date']], + n: [[/.[lsrnpb]ian$/, Adj], [/[^aeiou]ician$/, Actor], [/[aeiou][ktrp]in$/, 'Gerund'] // 'cookin', 'hootin' + ], + o: [[/^no+$/, Exp], //noooo + [/^(yo)+$/, Exp], //yoyo + [/^woo+[pt]?$/, Exp] //woo + ], + r: [[/.[bdfklmst]ler$/, 'Noun'], [/.[ilk]er$/, 'Comparative'], [/[aeiou][pns]er$/, Sing], [/[^i]fer$/, Inf], [/.[^aeiou][ao]pher$/, Actor]], + t: [[/.[di]est$/, 'Superlative'], [/.[icldtgrv]ent$/, Adj], [/[aeiou].*ist$/, Adj], [/^[a-z]et$/, Verb]], + s: [[/.[rln]ates$/, Pres], [/.[^z]ens$/, Verb], [/.[lstrn]us$/, Sing], [/.[aeiou]sks$/, Pres], //masks + [/.[aeiou]kes$/, Pres], //bakes + [/[aeiou][^aeiou]is$/, Sing], [/[a-z]\'s$/, Noun], [/^yes+$/, Exp] //yessss + ], + v: [[/.[^aeiou][ai][kln]ov$/, Last] //east-europe + ], + y: [[/.[cts]hy$/, Adj], [/.[st]ty$/, Adj], [/.[gk]y$/, Adj], [/.[tnl]ary$/, Adj], [/.[oe]ry$/, Sing], [/[rdntkbhs]ly$/, Adverb], [/...lly$/, Adverb], [/[bszmp]{2}y$/, Adj], [/.(gg|bb|zz)ly$/, Adj], [/.[aeiou]my$/, Adj], [/[ea]{2}zy$/, Adj], [/.[^aeiou]ity$/, Sing]] + }; + + //just a foolish lookup of known suffixes + var Adj$1 = 'Adjective'; + var Inf$1 = 'Infinitive'; + var Pres$1 = 'PresentTense'; + var Sing$1 = 'Singular'; + var Past$1 = 'PastTense'; + var Avb = 'Adverb'; + var Plrl = 'Plural'; + var Actor$1 = 'Actor'; + var Vb = 'Verb'; + var Noun$1 = 'Noun'; + var Last$1 = 'LastName'; + var Modal = 'Modal'; + var Place = 'Place'; // find any issues - https://observablehq.com/@spencermountain/suffix-word-lookup + + var suffixMap = [null, //0 + null, //1 + { + //2-letter + ea: Sing$1, + ia: Noun$1, + ic: Adj$1, + ly: Avb, + "'n": Vb, + "'t": Vb + }, { + //3-letter + oed: Past$1, + ued: Past$1, + xed: Past$1, + ' so': Avb, + "'ll": Modal, + "'re": 'Copula', + azy: Adj$1, + end: Vb, + ped: Past$1, + ffy: Adj$1, + ify: Inf$1, + ing: 'Gerund', + //likely to be converted to Adj after lexicon pass + ize: Inf$1, + lar: Adj$1, + mum: Adj$1, + nes: Pres$1, + nny: Adj$1, + oid: Adj$1, + ous: Adj$1, + que: Adj$1, + rmy: Adj$1, + rol: Sing$1, + sis: Sing$1, + zes: Pres$1 + }, { + //4-letter + amed: Past$1, + aped: Past$1, + ched: Past$1, + lked: Past$1, + nded: Past$1, + cted: Past$1, + dged: Past$1, + akis: Last$1, + //greek + cede: Inf$1, + chuk: Last$1, + //east-europe + czyk: Last$1, + //polish (male) + ects: Pres$1, + ends: Vb, + enko: Last$1, + //east-europe + ette: Sing$1, + fies: Pres$1, + fore: Avb, + gate: Inf$1, + gone: Adj$1, + ices: Plrl, + ints: Plrl, + ines: Plrl, + ions: Plrl, + less: Avb, + llen: Adj$1, + made: Adj$1, + nsen: Last$1, + //norway + oses: Pres$1, + ould: Modal, + some: Adj$1, + sson: Last$1, + //swedish male + tage: Inf$1, + teen: 'Value', + tion: Sing$1, + tive: Adj$1, + tors: Noun$1, + vice: Sing$1 + }, { + //5-letter + tized: Past$1, + urned: Past$1, + eased: Past$1, + ances: Plrl, + bound: Adj$1, + ettes: Plrl, + fully: Avb, + ishes: Pres$1, + ities: Plrl, + marek: Last$1, + //polish (male) + nssen: Last$1, + //norway + ology: Noun$1, + ports: Plrl, + rough: Adj$1, + tches: Pres$1, + tieth: 'Ordinal', + tures: Plrl, + wards: Avb, + where: Avb + }, { + //6-letter + auskas: Last$1, + //lithuania + keeper: Actor$1, + logist: Actor$1, + teenth: 'Value' + }, { + //7-letter + opoulos: Last$1, + //greek + borough: Place, + //Hillsborough + sdottir: Last$1 //swedish female + + }]; + + var endRegexs = function endRegexs(term, world) { + var str = term.clean; + var _char = str[str.length - 1]; + + if (endsWith$1.hasOwnProperty(_char) === true) { + var regs = endsWith$1[_char]; + + for (var r = 0; r < regs.length; r += 1) { + if (regs[r][0].test(str) === true) { + term.tagSafe(regs[r][1], "endReg ".concat(_char, " #").concat(r), world); + break; + } + } + } + }; //sweep-through all suffixes + + + var knownSuffixes = function knownSuffixes(term, world) { + var len = term.clean.length; + var max = 7; + + if (len <= max) { + max = len - 1; + } + + for (var i = max; i > 1; i -= 1) { + var str = term.clean.substr(len - i, len); + + if (suffixMap[str.length].hasOwnProperty(str) === true) { + var tag = suffixMap[str.length][str]; + term.tagSafe(tag, 'suffix -' + str, world); + break; + } + } + }; //all-the-way-down! + + + var checkRegex$1 = function checkRegex(term, world) { + knownSuffixes(term, world); + endRegexs(term, world); + }; + + var _04Suffixes = checkRegex$1; + + //just some of the most common emoticons + //faster than + //http://stackoverflow.com/questions/28077049/regex-matching-emoticons + var emoticons = { + ':(': true, + ':)': true, + ':P': true, + ':p': true, + ':O': true, + ':3': true, + ':|': true, + ':/': true, + ':\\': true, + ':$': true, + ':*': true, + ':@': true, + ':-(': true, + ':-)': true, + ':-P': true, + ':-p': true, + ':-O': true, + ':-3': true, + ':-|': true, + ':-/': true, + ':-\\': true, + ':-$': true, + ':-*': true, + ':-@': true, + ':^(': true, + ':^)': true, + ':^P': true, + ':^p': true, + ':^O': true, + ':^3': true, + ':^|': true, + ':^/': true, + ':^\\': true, + ':^$': true, + ':^*': true, + ':^@': true, + '):': true, + '(:': true, + '$:': true, + '*:': true, + ')-:': true, + '(-:': true, + '$-:': true, + '*-:': true, + ')^:': true, + '(^:': true, + '$^:': true, + '*^:': true, + '<3': true, + ' 35) { + return false; + } + + return true; + } + + return false; + }; //check against emoticon whitelist + + + var isEmoticon = function isEmoticon(str) { + str = str.replace(/^[:;]/, ':'); //normalize the 'eyes' + + return emoticons.hasOwnProperty(str); + }; + + var tagEmoji = function tagEmoji(term, world) { + var raw = term.pre + term.text + term.post; + raw = raw.trim(); //dont double-up on ending periods + + raw = raw.replace(/[.!?,]$/, ''); //test for :keyword: emojis + + if (isCommaEmoji(raw) === true) { + term.tag('Emoji', 'comma-emoji', world); + term.text = raw; + term.pre = term.pre.replace(':', ''); + term.post = term.post.replace(':', ''); + } //test for unicode emojis + + + if (term.text.match(emojiReg)) { + term.tag('Emoji', 'unicode-emoji', world); + term.text = raw; + } //test for emoticon ':)' emojis + + + if (isEmoticon(raw) === true) { + term.tag('Emoticon', 'emoticon-emoji', world); + term.text = raw; + } + }; + + var _05Emoji = tagEmoji; + + var steps = { + lexicon: _01Lexicon, + punctuation: _02Punctuation$1, + regex: _03Prefixes, + suffix: _04Suffixes, + emoji: _05Emoji + }; //'lookups' look at a term by itself + + var lookups = function lookups(doc, terms) { + var world = doc.world; //our list of known-words + + steps.lexicon(terms, world); //try these other methods + + for (var i = 0; i < terms.length; i += 1) { + var term = terms[i]; //or maybe some helpful punctuation + + steps.punctuation(terms, i, world); //mostly prefix checks + + steps.regex(term, world); //maybe we can guess + + steps.suffix(term, world); //emoji and emoticons + + steps.emoji(term, world); + } + + return doc; + }; + + var _01Init = lookups; + + //markov-like stats about co-occurance, for hints about unknown terms + //basically, a little-bit better than the noun-fallback + //just top n-grams from nlp tags, generated from nlp-corpus + //after this word, here's what happens usually + var afterThisWord = { + i: 'Verb', + //44% //i walk.. + first: 'Noun', + //50% //first principles.. + it: 'Verb', + //33% + there: 'Verb', + //35% + not: 'Verb', + //33% + because: 'Noun', + //31% + "if": 'Noun', + //32% + but: 'Noun', + //26% + who: 'Verb', + //40% + "this": 'Noun', + //37% + his: 'Noun', + //48% + when: 'Noun', + //33% + you: 'Verb', + //35% + very: 'Adjective', + // 39% + old: 'Noun', + //51% + never: 'Verb', + //42% + before: 'Noun' //28% + + }; //in advance of this word, this is what happens usually + + var beforeThisWord = { + there: 'Verb', + //23% // be there + me: 'Verb', + //31% //see me + man: 'Adjective', + // 80% //quiet man + only: 'Verb', + //27% //sees only + him: 'Verb', + //32% //show him + were: 'Noun', + //48% //we were + took: 'Noun', + //38% //he took + himself: 'Verb', + //31% //see himself + went: 'Noun', + //43% //he went + who: 'Noun', + //47% //person who + jr: 'Person' + }; //following this POS, this is likely + + var afterThisPOS = { + Adjective: 'Noun', + //36% //blue dress + Possessive: 'Noun', + //41% //his song + Determiner: 'Noun', + //47% + Adverb: 'Verb', + //20% + Pronoun: 'Verb', + //40% + Value: 'Noun', + //47% + Ordinal: 'Noun', + //53% + Modal: 'Verb', + //35% + Superlative: 'Noun', + //43% + Demonym: 'Noun', + //38% + Honorific: 'Person' // + + }; //in advance of this POS, this is likely + + var beforeThisPOS = { + Copula: 'Noun', + //44% //spencer is + PastTense: 'Noun', + //33% //spencer walked + Conjunction: 'Noun', + //36% + Modal: 'Noun', + //38% + Pluperfect: 'Noun', + //40% + PerfectTense: 'Verb' //32% + + }; + var markov = { + beforeThisWord: beforeThisWord, + afterThisWord: afterThisWord, + beforeThisPos: beforeThisPOS, + afterThisPos: afterThisPOS + }; + + var afterKeys = Object.keys(markov.afterThisPos); + var beforeKeys = Object.keys(markov.beforeThisPos); + + var checkNeighbours = function checkNeighbours(terms, world) { + var _loop = function _loop(i) { + var term = terms[i]; //do we still need a tag? + + if (term.isKnown() === true) { + return "continue"; + } //ok, this term needs a tag. + //look at previous word for clues.. + + + var lastTerm = terms[i - 1]; + + if (lastTerm) { + // 'foobar term' + if (markov.afterThisWord.hasOwnProperty(lastTerm.clean) === true) { + var tag = markov.afterThisWord[lastTerm.clean]; + term.tag(tag, 'after-' + lastTerm.clean, world); + return "continue"; + } // 'Tag term' + // (look at previous POS tags for clues..) + + + var foundTag = afterKeys.find(function (tag) { + return lastTerm.tags[tag]; + }); + + if (foundTag !== undefined) { + var _tag = markov.afterThisPos[foundTag]; + term.tag(_tag, 'after-' + foundTag, world); + return "continue"; + } + } //look at next word for clues.. + + + var nextTerm = terms[i + 1]; + + if (nextTerm) { + // 'term foobar' + if (markov.beforeThisWord.hasOwnProperty(nextTerm.clean) === true) { + var _tag2 = markov.beforeThisWord[nextTerm.clean]; + term.tag(_tag2, 'before-' + nextTerm.clean, world); + return "continue"; + } // 'term Tag' + // (look at next POS tags for clues..) + + + var _foundTag = beforeKeys.find(function (tag) { + return nextTerm.tags[tag]; + }); + + if (_foundTag !== undefined) { + var _tag3 = markov.beforeThisPos[_foundTag]; + term.tag(_tag3, 'before-' + _foundTag, world); + return "continue"; + } + } + }; + + for (var i = 0; i < terms.length; i += 1) { + var _ret = _loop(i); + + if (_ret === "continue") continue; + } + }; + + var _01Neighbours = checkNeighbours; + + var titleCase$4 = /^[A-Z][a-z'\u00C0-\u00FF]/; + var hasNumber = /[0-9]/; + /** look for any grammar signals based on capital/lowercase */ + + var checkCase = function checkCase(doc) { + var world = doc.world; + doc.list.forEach(function (p) { + var terms = p.terms(); + + for (var i = 1; i < terms.length; i++) { + var term = terms[i]; + + if (titleCase$4.test(term.text) === true && hasNumber.test(term.text) === false) { + term.tag('ProperNoun', 'titlecase-noun', world); + } + } + }); + }; + + var _02Case = checkCase; + + var hasPrefix = /^(re|un)-?[a-z\u00C0-\u00FF]/; + var prefix = /^(re|un)-?/; + /** check 'rewatch' in lexicon as 'watch' */ + + var checkPrefix = function checkPrefix(terms, world) { + var lex = world.words; + terms.forEach(function (term) { + // skip if we have a good tag already + if (term.isKnown() === true) { + return; + } //does it start with 'un|re' + + + if (hasPrefix.test(term.clean) === true) { + // look for the root word in the lexicon: + var stem = term.clean.replace(prefix, ''); + + if (stem && stem.length > 3 && lex[stem] !== undefined && lex.hasOwnProperty(stem) === true) { + term.tag(lex[stem], 'stem-' + stem, world); + } + } + }); + }; + + var _03Stem = checkPrefix; + + //similar to plural/singularize rules, but not the same + var isPlural = [/(^v)ies$/i, /ises$/i, /ives$/i, /(antenn|formul|nebul|vertebr|vit)ae$/i, /(octop|vir|radi|nucle|fung|cact|stimul)i$/i, /(buffal|tomat|tornad)oes$/i, /(analy|ba|diagno|parenthe|progno|synop|the)ses$/i, /(vert|ind|cort)ices$/i, /(matr|append)ices$/i, /(x|ch|ss|sh|s|z|o)es$/i, /is$/i, /men$/i, /news$/i, /.tia$/i, /(^f)ves$/i, /(lr)ves$/i, /(^aeiouy|qu)ies$/i, /(m|l)ice$/i, /(cris|ax|test)es$/i, /(alias|status)es$/i, /ics$/i]; //similar to plural/singularize rules, but not the same + + var isSingular = [/(ax|test)is$/i, /(octop|vir|radi|nucle|fung|cact|stimul)us$/i, /(octop|vir)i$/i, /(rl)f$/i, /(alias|status)$/i, /(bu)s$/i, /(al|ad|at|er|et|ed|ad)o$/i, /(ti)um$/i, /(ti)a$/i, /sis$/i, /(?:(^f)fe|(lr)f)$/i, /hive$/i, /s[aeiou]+ns$/i, // sans, siens + /(^aeiouy|qu)y$/i, /(x|ch|ss|sh|z)$/i, /(matr|vert|ind|cort)(ix|ex)$/i, /(m|l)ouse$/i, /(m|l)ice$/i, /(antenn|formul|nebul|vertebr|vit)a$/i, /.sis$/i, /^(?!talis|.*hu)(.*)man$/i]; + var isPlural_1 = { + isSingular: isSingular, + isPlural: isPlural + }; + + var noPlurals = ['Uncountable', 'Pronoun', 'Place', 'Value', 'Person', 'Month', 'WeekDay', 'Holiday']; + var notPlural = [/ss$/, /sis$/, /[^aeiou][uo]s$/, /'s$/]; + var notSingular = [/i$/, /ae$/]; + /** turn nouns into singular/plural */ + + var checkPlural = function checkPlural(t, world) { + if (t.tags.Noun && !t.tags.Acronym) { + var str = t.clean; //skip existing tags, fast + + if (t.tags.Singular || t.tags.Plural) { + return; + } //too short + + + if (str.length <= 3) { + t.tag('Singular', 'short-singular', world); + return; + } //is it impossible to be plural? + + + if (noPlurals.find(function (tag) { + return t.tags[tag]; + })) { + return; + } // isPlural suffix rules + + + if (isPlural_1.isPlural.find(function (reg) { + return reg.test(str); + })) { + t.tag('Plural', 'plural-rules', world); + return; + } // isSingular suffix rules + + + if (isPlural_1.isSingular.find(function (reg) { + return reg.test(str); + })) { + t.tag('Singular', 'singular-rules', world); + return; + } // finally, fallback 'looks plural' rules.. + + + if (/s$/.test(str) === true) { + //avoid anything too sketchy to be plural + if (notPlural.find(function (reg) { + return reg.test(str); + })) { + return; + } + + t.tag('Plural', 'plural-fallback', world); + return; + } //avoid anything too sketchy to be singular + + + if (notSingular.find(function (reg) { + return reg.test(str); + })) { + return; + } + + t.tag('Singular', 'singular-fallback', world); + } + }; + + var _04Plurals = checkPlural; + + //nouns that also signal the title of an unknown organization + //todo remove/normalize plural forms + var orgWords = ['academy', 'administration', 'agence', 'agences', 'agencies', 'agency', 'airlines', 'airways', 'army', 'assoc', 'associates', 'association', 'assurance', 'authority', 'autorite', 'aviation', 'bank', 'banque', 'board', 'boys', 'brands', 'brewery', 'brotherhood', 'brothers', 'building society', 'bureau', 'cafe', 'caisse', 'capital', 'care', 'cathedral', 'center', 'central bank', 'centre', 'chemicals', 'choir', 'chronicle', 'church', 'circus', 'clinic', 'clinique', 'club', 'co', 'coalition', 'coffee', 'collective', 'college', 'commission', 'committee', 'communications', 'community', 'company', 'comprehensive', 'computers', 'confederation', 'conference', 'conseil', 'consulting', 'containers', 'corporation', 'corps', 'corp', 'council', 'crew', 'daily news', 'data', 'departement', 'department', 'department store', 'departments', 'design', 'development', 'directorate', 'division', 'drilling', 'education', 'eglise', 'electric', 'electricity', 'energy', 'ensemble', 'enterprise', 'enterprises', 'entertainment', 'estate', 'etat', 'evening news', 'faculty', 'federation', 'financial', 'fm', 'foundation', 'fund', 'gas', 'gazette', 'girls', 'government', 'group', 'guild', 'health authority', 'herald', 'holdings', 'hospital', 'hotel', 'hotels', 'inc', 'industries', 'institut', 'institute', 'institute of technology', 'institutes', 'insurance', 'international', 'interstate', 'investment', 'investments', 'investors', 'journal', 'laboratory', 'labs', // 'law', + 'liberation army', 'limited', 'local authority', 'local health authority', 'machines', 'magazine', 'management', 'marine', 'marketing', 'markets', 'media', 'memorial', 'mercantile exchange', 'ministere', 'ministry', 'military', 'mobile', 'motor', 'motors', 'musee', 'museum', // 'network', + 'news', 'news service', 'observatory', 'office', 'oil', 'optical', 'orchestra', 'organization', 'partners', 'partnership', // 'party', + "people's party", 'petrol', 'petroleum', 'pharmacare', 'pharmaceutical', 'pharmaceuticals', 'pizza', 'plc', 'police', 'polytechnic', 'post', 'power', 'press', 'productions', 'quartet', 'radio', 'regional authority', 'regional health authority', 'reserve', 'resources', 'restaurant', 'restaurants', 'savings', 'school', 'securities', 'service', 'services', 'social club', 'societe', 'society', 'sons', 'standard', 'state police', 'state university', 'stock exchange', 'subcommittee', 'syndicat', 'systems', 'telecommunications', 'telegraph', 'television', 'times', 'tribunal', 'tv', 'union', 'university', 'utilities', 'workers']; + var organizations = orgWords.reduce(function (h, str) { + h[str] = 'Noun'; + return h; + }, {}); + + var maybeOrg = function maybeOrg(t) { + //must be a noun + if (!t.tags.Noun) { + return false; + } //can't be these things + + + if (t.tags.Pronoun || t.tags.Comma || t.tags.Possessive) { + return false; + } //must be one of these + + + if (t.tags.Organization || t.tags.Acronym || t.tags.Place || t.titleCase()) { + return true; + } + + return false; + }; + + var tagOrgs = function tagOrgs(terms, world) { + for (var i = 0; i < terms.length; i += 1) { + var t = terms[i]; + + if (organizations[t.clean] !== undefined && organizations.hasOwnProperty(t.clean) === true) { + // look-backward - eg. 'Toronto University' + var lastTerm = terms[i - 1]; + + if (lastTerm !== undefined && maybeOrg(lastTerm) === true) { + lastTerm.tagSafe('Organization', 'org-word-1', world); + t.tagSafe('Organization', 'org-word-2', world); + continue; + } //look-forward - eg. University of Toronto + + + var nextTerm = terms[i + 1]; + + if (nextTerm !== undefined && nextTerm.clean === 'of') { + if (terms[i + 2] && maybeOrg(terms[i + 2])) { + t.tagSafe('Organization', 'org-of-word-1', world); + nextTerm.tagSafe('Organization', 'org-of-word-2', world); + terms[i + 2].tagSafe('Organization', 'org-of-word-3', world); + continue; + } + } + } + } + }; + + var _05Organizations = tagOrgs; + + var oneLetterAcronym$1 = /^[A-Z]('s|,)?$/; + var periodSeperated = /([A-Z]\.){2}[A-Z]?/i; + var oneLetterWord = { + I: true, + A: true + }; + + var isAcronym$2 = function isAcronym(term, world) { + var str = term.reduced; // a known acronym like fbi + + if (term.tags.Acronym) { + return true; + } // if (term.tags.Adverb || term.tags.Verb || term.tags.Value || term.tags.Plural) { + // return false + // } + // known-words, like 'PIZZA' is not an acronym. + + + if (world.words[str]) { + return false; + } + + return term.isAcronym(); + }; // F.B.I., NBC, - but not 'NO COLLUSION' + + + var checkAcronym = function checkAcronym(terms, world) { + terms.forEach(function (term) { + //these are not acronyms + if (term.tags.RomanNumeral === true) { + return; + } //period-ones F.D.B. + + + if (periodSeperated.test(term.text) === true) { + term.tag('Acronym', 'period-acronym', world); + } //non-period ones are harder + + + if (term.isUpperCase() && isAcronym$2(term, world)) { + term.tag('Acronym', 'acronym-step', world); + term.tag('Noun', 'acronym-infer', world); + } else if (!oneLetterWord.hasOwnProperty(term.text) && oneLetterAcronym$1.test(term.text)) { + term.tag('Acronym', 'one-letter-acronym', world); + term.tag('Noun', 'one-letter-infer', world); + } //if it's a organization, + + + if (term.tags.Organization && term.text.length <= 3) { + term.tag('Acronym', 'acronym-org', world); + } + + if (term.tags.Organization && term.isUpperCase() && term.text.length <= 6) { + term.tag('Acronym', 'acronym-org-case', world); + } + }); + }; + + var _06Acronyms = checkAcronym; + + var step = { + neighbours: _01Neighbours, + "case": _02Case, + stem: _03Stem, + plural: _04Plurals, + organizations: _05Organizations, + acronyms: _06Acronyms + }; // + + var fallbacks = function fallbacks(doc, terms) { + var world = doc.world; // if it's empty, consult it's neighbours, first + + step.neighbours(terms, world); // is there a case-sensitive clue? + + step["case"](doc); // check 'rewatch' as 'watch' + + step.stem(terms, world); // ... fallback to a noun! + + terms.forEach(function (t) { + if (t.isKnown() === false) { + t.tag('Noun', 'noun-fallback', doc.world); + } + }); // turn 'Foo University' into an Org + + step.organizations(terms, world); //turn 'FBD' into an acronym + + step.acronyms(terms, world); //are the nouns singular or plural? + + terms.forEach(function (t) { + step.plural(t, doc.world); + }); + return doc; + }; + + var _02Fallbacks = fallbacks; + + var hasNegative = /n't$/; + var irregulars$3 = { + "won't": ['will', 'not'], + wont: ['will', 'not'], + "can't": ['can', 'not'], + cant: ['can', 'not'], + cannot: ['can', 'not'], + "shan't": ['should', 'not'], + dont: ['do', 'not'], + dun: ['do', 'not'] // "ain't" is ambiguous for is/was + + }; // either 'is not' or 'are not' + + var doAint = function doAint(term, phrase) { + var terms = phrase.terms(); + var index = terms.indexOf(term); + var before = terms.slice(0, index); //look for the preceding noun + + var noun = before.find(function (t) { + return t.tags.Noun; + }); + + if (noun && noun.tags.Plural) { + return ['are', 'not']; + } + + return ['is', 'not']; + }; + + var checkNegative = function checkNegative(term, phrase) { + //check named-ones + if (irregulars$3.hasOwnProperty(term.clean) === true) { + return irregulars$3[term.clean]; + } //this word needs it's own logic: + + + if (term.clean === "ain't" || term.clean === 'aint') { + return doAint(term, phrase); + } //try it normally + + + if (hasNegative.test(term.clean) === true) { + var main = term.clean.replace(hasNegative, ''); + return [main, 'not']; + } + + return null; + }; + + var _01Negative = checkNegative; + + var contraction = /([a-z\u00C0-\u00FF]+)[\u0027\u0060\u00B4\u2018\u2019\u201A\u201B\u2032\u2035\u2039\u203A]([a-z]{1,2})$/i; //these ones don't seem to be ambiguous + + var easy = { + ll: 'will', + ve: 'have', + re: 'are', + m: 'am', + "n't": 'not' + }; // + + var checkApostrophe = function checkApostrophe(term) { + var parts = term.text.match(contraction); + + if (parts === null) { + return null; + } + + if (easy.hasOwnProperty(parts[2])) { + return [parts[1], easy[parts[2]]]; + } + + return null; + }; + + var _02Simple = checkApostrophe; + + var irregulars$4 = { + wanna: ['want', 'to'], + gonna: ['going', 'to'], + im: ['i', 'am'], + alot: ['a', 'lot'], + ive: ['i', 'have'], + imma: ['I', 'will'], + "where'd": ['where', 'did'], + whered: ['where', 'did'], + "when'd": ['when', 'did'], + whend: ['when', 'did'], + // "how'd": ['how', 'did'], //'how would?' + // "what'd": ['what', 'did'], //'what would?' + howd: ['how', 'did'], + whatd: ['what', 'did'], + // "let's": ['let', 'us'], //too weird + //multiple word contractions + dunno: ['do', 'not', 'know'], + brb: ['be', 'right', 'back'], + gtg: ['got', 'to', 'go'], + irl: ['in', 'real', 'life'], + tbh: ['to', 'be', 'honest'], + imo: ['in', 'my', 'opinion'], + til: ['today', 'i', 'learned'], + rn: ['right', 'now'], + twas: ['it', 'was'], + '@': ['at'] + }; // + + var checkIrregulars = function checkIrregulars(term) { + //check white-list + if (irregulars$4.hasOwnProperty(term.clean)) { + return irregulars$4[term.clean]; + } + + return null; + }; + + var _03Irregulars = checkIrregulars; + + var hasApostropheS = /([a-z\u00C0-\u00FF]+)[\u0027\u0060\u00B4\u2018\u2019\u201A\u201B\u2032\u2035\u2039\u203A]s$/i; + var banList = { + that: true, + there: true + }; + + var isPossessive = function isPossessive(term, pool) { + // if we already know it + if (term.tags.Possessive) { + return true; + } //a pronoun can't be possessive - "he's house" + + + if (term.tags.Pronoun || term.tags.QuestionWord) { + return false; + } + + if (banList.hasOwnProperty(term.reduced)) { + return false; + } //if end of sentence, it is possessive - "was spencer's" + + + var nextTerm = pool.get(term.next); + + if (!nextTerm) { + return true; + } //a gerund suggests 'is walking' + + + if (nextTerm.tags.Verb) { + //fix 'jamie's bite' + if (nextTerm.tags.Infinitive) { + return true; + } //fix 'spencer's runs' + + + if (nextTerm.tags.PresentTense) { + return true; + } + + return false; + } //spencer's house + + + if (nextTerm.tags.Noun) { + return true; + } //rocket's red glare + + + var twoTerm = pool.get(nextTerm.next); + + if (twoTerm && twoTerm.tags.Noun && !twoTerm.tags.Pronoun) { + return true; + } //othwerwise, an adjective suggests 'is good' + + + if (nextTerm.tags.Adjective || nextTerm.tags.Adverb || nextTerm.tags.Verb) { + return false; + } + + return false; + }; + + var isHas = function isHas(term, phrase) { + var terms = phrase.terms(); + var index = terms.indexOf(term); + var after = terms.slice(index + 1, index + 3); //look for a past-tense verb + + return after.find(function (t) { + return t.tags.PastTense; + }); + }; + + var checkPossessive = function checkPossessive(term, phrase, world) { + //the rest of 's + var found = term.text.match(hasApostropheS); + + if (found !== null) { + //spencer's thing vs spencer-is + if (isPossessive(term, phrase.pool) === true) { + term.tag('#Possessive', 'isPossessive', world); + return null; + } //'spencer is' + + + if (found !== null) { + if (isHas(term, phrase)) { + return [found[1], 'has']; + } + + return [found[1], 'is']; + } + } + + return null; + }; + + var _04Possessive = checkPossessive; + + var hasPerfect = /[a-z\u00C0-\u00FF]'d$/; + var useDid = { + how: true, + what: true + }; + /** split `i'd` into 'i had', or 'i would' */ + + var checkPerfect = function checkPerfect(term, phrase) { + if (hasPerfect.test(term.clean)) { + var root = term.clean.replace(/'d$/, ''); //look at the next few words + + var terms = phrase.terms(); + var index = terms.indexOf(term); + var after = terms.slice(index + 1, index + 4); //is it before a past-tense verb? - 'i'd walked' + + for (var i = 0; i < after.length; i++) { + var t = after[i]; + + if (t.tags.Verb) { + if (t.tags.PastTense) { + return [root, 'had']; + } //what'd you see + + + if (useDid[root] === true) { + return [root, 'did']; + } + + return [root, 'would']; + } + } //otherwise, 'i'd walk' + + + return [root, 'would']; + } + + return null; + }; + + var _05PerfectTense = checkPerfect; + + var isRange = /^([0-9]+)[-–—]([0-9]+)$/i; //split '2-4' into '2 to 4' + + var checkRange = function checkRange(term) { + if (term.tags.PhoneNumber === true) { + return null; + } + + var parts = term.text.match(isRange); + + if (parts !== null) { + return [parts[1], 'to', parts[2]]; + } + + return null; + }; + + var _06Ranges = checkRange; + + var contraction$1 = /^(l|c|d|j|m|n|qu|s|t)[\u0027\u0060\u00B4\u2018\u2019\u201A\u201B\u2032\u2035\u2039\u203A]([a-z\u00C0-\u00FF]+)$/i; // basic support for ungendered french contractions + // not perfect, but better than nothing, to support matching on french text. + + var french = { + l: 'le', + // l'amour + c: 'ce', + // c'est + d: 'de', + // d'amerique + j: 'je', + // j'aime + m: 'me', + // m'appelle + n: 'ne', + // n'est + qu: 'que', + // qu'il + s: 'se', + // s'appelle + t: 'tu' // t'aime + + }; + + var checkFrench = function checkFrench(term) { + var parts = term.text.match(contraction$1); + + if (parts === null || french.hasOwnProperty(parts[1]) === false) { + return null; + } + + var arr = [french[parts[1]], parts[2]]; + + if (arr[0] && arr[1]) { + return arr; + } + + return null; + }; + + var _07French = checkFrench; + + var isNumber = /^[0-9]+$/; + + var createPhrase = function createPhrase(found, doc) { + //create phrase from ['would', 'not'] + var phrase = _01Tokenizer(found.join(' '), doc.world, doc.pool())[0]; //tag it + + var terms = phrase.terms(); + _01Lexicon(terms, doc.world); //make these terms implicit + + terms.forEach(function (t) { + t.implicit = t.text; + t.text = ''; + t.clean = ''; // remove whitespace for implicit terms + + t.pre = ''; + t.post = ''; // tag number-ranges + + if (isNumber.test(t.implicit)) { + t.tags.Number = true; + t.tags.Cardinal = true; + } + }); + return phrase; + }; + + var contractions = function contractions(doc) { + var world = doc.world; + doc.list.forEach(function (p) { + var terms = p.terms(); + + for (var i = 0; i < terms.length; i += 1) { + var term = terms[i]; + var found = _01Negative(term, p); + found = found || _02Simple(term); + found = found || _03Irregulars(term); + found = found || _04Possessive(term, p, world); + found = found || _05PerfectTense(term, p); + found = found || _06Ranges(term); + found = found || _07French(term); //add them in + + if (found !== null) { + var newPhrase = createPhrase(found, doc); // keep tag NumberRange, if we had it + + if (p.has('#NumberRange') === true) { + doc.buildFrom([newPhrase]).tag('NumberRange'); + } //set text as contraction + + + var firstTerm = newPhrase.terms(0); + firstTerm.text = term.text; //grab sub-phrase to remove + + var match = p.buildFrom(term.id, 1, doc.pool()); + match.replace(newPhrase, doc, true); + } + } + }); + return doc; + }; + + var _03Contractions = contractions; + + var hasWord = function hasWord(doc, word) { + var arr = doc._cache.words[word] || []; + arr = arr.map(function (i) { + return doc.list[i]; + }); + return doc.buildFrom(arr); + }; + + var hasTag = function hasTag(doc, tag) { + var arr = doc._cache.tags[tag] || []; + arr = arr.map(function (i) { + return doc.list[i]; + }); + return doc.buildFrom(arr); + }; //mostly pos-corections here + + + var miscCorrection = function miscCorrection(doc) { + //exactly like + var m = hasWord(doc, 'like'); + m.match('#Adverb like').notIf('(really|generally|typically|usually|sometimes|often) [like]').tag('Adverb', 'adverb-like'); //the orange. + + m = hasTag(doc, 'Adjective'); + m.match('#Determiner #Adjective$').notIf('(#Comparative|#Superlative)').terms(1).tag('Noun', 'the-adj-1'); // Firstname x (dangerous) + + m = hasTag(doc, 'FirstName'); + m.match('#FirstName (#Noun|@titleCase)').ifNo('^#Possessive').ifNo('#Pronoun').ifNo('@hasComma .').lastTerm().tag('#LastName', 'firstname-noun'); //three trains / one train + + m = hasTag(doc, 'Value'); + m = m.match('#Value #PresentTense'); + + if (m.found) { + if (m.has('(one|1)') === true) { + m.terms(1).tag('Singular', 'one-presentTense'); + } else { + m.terms(1).tag('Plural', 'value-presentTense'); + } + } // well i've been... + + + doc.match('^(well|so|okay)').tag('Expression', 'well-'); //been walking + + m = hasTag(doc, 'Gerund'); + m.match("(be|been) (#Adverb|not)+? #Gerund").not('#Verb$').tag('Auxiliary', 'be-walking'); // directive verb - 'use reverse' + + doc.match('(try|use|attempt|build|make) #Verb').ifNo('(@hasComma|#Negative|#Copula|will|be)').lastTerm().tag('#Noun', 'do-verb'); //possessives + //'her match' vs 'let her match' + + m = hasTag(doc, 'Possessive'); + m = m.match('#Possessive [#Infinitive]', 0); + + if (!m.lookBehind('(let|made|make|force|ask)').found) { + m.tag('Noun', 'her-match'); + } + + return doc; + }; + + var fixMisc = miscCorrection; + + var unique$5 = function unique(arr) { + var obj = {}; + + for (var i = 0; i < arr.length; i++) { + obj[arr[i]] = true; + } + + return Object.keys(obj); + }; + + var _unique = unique$5; + + // order matters + var list = [// ==== Mutliple tags ==== + { + match: 'too much', + tag: 'Adverb Adjective', + reason: 'bit-4' + }, // u r cool + { + match: 'u r', + tag: 'Pronoun Copula', + reason: 'u r' + }, //sometimes adverbs - 'pretty good','well above' + { + match: '#Copula (pretty|dead|full|well) (#Adjective|#Noun)', + tag: '#Copula #Adverb #Adjective', + reason: 'sometimes-adverb' + }, //walking is cool + { + match: '[#Gerund] #Adverb? not? #Copula', + group: 0, + tag: 'Activity', + reason: 'gerund-copula' + }, //walking should be fun + { + match: '[#Gerund] #Modal', + group: 0, + tag: 'Activity', + reason: 'gerund-modal' + }, //swear-words as non-expression POS + { + match: 'holy (shit|fuck|hell)', + tag: 'Expression', + reason: 'swears-expression' + }, //Aircraft designer + { + match: '#Noun #Actor', + tag: 'Actor', + reason: 'thing-doer' + }, { + match: '#Conjunction [u]', + group: 0, + tag: 'Pronoun', + reason: 'u-pronoun-2' + }, //'u' as pronoun + { + match: '[u] #Verb', + group: 0, + tag: 'Pronoun', + reason: 'u-pronoun-1' + }, // ==== Determiners ==== + { + match: '#Noun [(who|whom)]', + group: 0, + tag: 'Determiner', + reason: 'captain-who' + }, //that car goes + { + match: 'that #Noun [#Verb]', + group: 0, + tag: 'Determiner', + reason: 'that-determiner' + }, { + match: 'a bit much', + tag: 'Determiner Adverb Adjective', + reason: 'bit-3' + }, // ==== Propositions ==== + //all students + { + match: '#Verb #Adverb? #Noun [(that|which)]', + group: 0, + tag: 'Preposition', + reason: 'that-prep' + }, //work, which has been done. + { + match: '@hasComma [which] (#Pronoun|#Verb)', + group: 0, + tag: 'Preposition', + reason: 'which-copula' + }, + { + match: 'just [like]', + group: 0, + tag: 'Preposition', + reason: 'like-preposition' + }, //folks like her + { + match: '#Noun [like] #Noun', + group: 0, + tag: 'Preposition', + reason: 'noun-like' + }, //fix for busted-up phrasalVerbs + { + match: '#Noun [#Particle]', + group: 0, + tag: 'Preposition', + reason: 'repair-noPhrasal' + }, // ==== Conditions ==== + // had he survived, + { + match: '[had] #Noun+ #PastTense', + group: 0, + tag: 'Condition', + reason: 'had-he' + }, // were he to survive + { + match: '[were] #Noun+ to #Infinitive', + group: 0, + tag: 'Condition', + reason: 'were-he' + }, // ==== Questions ==== + //the word 'how' + { + match: '^how', + tag: 'QuestionWord', + reason: 'how-question' + }, { + match: '[how] (#Determiner|#Copula|#Modal|#PastTense)', + group: 0, + tag: 'QuestionWord', + reason: 'how-is' + }, // //the word 'which' + { + match: '^which', + tag: 'QuestionWord', + reason: 'which-question' + }, { + match: '[which] . (#Noun)+ #Pronoun', + group: 0, + tag: 'QuestionWord', + reason: 'which-question2' + }, // { match: 'which', tag: 'QuestionWord', reason: 'which-question3' }, + // ==== Conjunctions ==== + { + match: '[so] #Noun', + group: 0, + tag: 'Conjunction', + reason: 'so-conj' + }, //how he is driving + { + match: '[(who|what|where|why|how|when)] #Noun #Copula #Adverb? (#Verb|#Adjective)', + group: 0, + tag: 'Conjunction', + reason: 'how-he-is-x' + }, + { + match: '[(who|what|where|why|how|when)] #Noun #Adverb? #Infinitive not? #Gerund', + group: 0, + tag: 'Conjunction', + reason: 'when i go fishing' + }, + { /*@blab+*/ + match: '^[(who|what|where|why|how|when)] #Adjective? #Verb #Pronoun', + group: 0, + tag: 'Adverb', + reason: 'where-question' + }, + { /*@blab+*/ + match: 'the way', + tag: 'Noun', + reason: 'fix-1' + }, + ]; + var _01Misc = list; + + //Dates: 'june' or 'may' + var dates = '(april|june|may|jan|august|eve)'; + var list$1 = [// ==== Holiday ==== + { + match: '#Holiday (day|eve)', + tag: 'Holiday', + reason: 'holiday-day' + }, // the captain who + // ==== WeekDay ==== + // sun the 5th + { + match: '[sun] the #Ordinal', + tag: 'WeekDay', + reason: 'sun-the-5th' + }, //sun feb 2 + { + match: '[sun] #Date', + group: 0, + tag: 'WeekDay', + reason: 'sun-feb' + }, //1pm next sun + { + match: '#Date (on|this|next|last|during)? [sun]', + group: 0, + tag: 'WeekDay', + reason: '1pm-sun' + }, //this sat + { + match: "(in|by|before|during|on|until|after|of|within|all) [sat]", + group: 0, + tag: 'WeekDay', + reason: 'sat' + }, //sat november + { + match: '[sat] #Date', + group: 0, + tag: 'WeekDay', + reason: 'sat-feb' + }, // ==== Month ==== + //all march + { + match: "#Preposition [(march|may)]", + group: 0, + tag: 'Month', + reason: 'in-month' + }, //this march + { + match: "this [(march|may)]", + group: 0, + tag: 'Month', + reason: 'this-month' + }, { + match: "next [(march|may)]", + group: 0, + tag: 'Month', + reason: 'this-month' + }, { + match: "last [(march|may)]", + group: 0, + tag: 'Month', + reason: 'this-month' + }, // march 5th + { + match: "[(march|may)] the? #Value", + group: 0, + tag: 'Month', + reason: 'march-5th' + }, // 5th of march + { + match: "#Value of? [(march|may)]", + group: 0, + tag: 'Month', + reason: '5th-of-march' + }, // march and feb + { + match: "[(march|may)] .? #Date", + group: 0, + tag: 'Month', + reason: 'march-and-feb' + }, // feb to march + { + match: "#Date .? [(march|may)]", + group: 0, + tag: 'Month', + reason: 'feb-and-march' + }, //quickly march + { + match: "#Adverb [(march|may)]", + group: 0, + tag: 'Verb', + reason: 'quickly-march' + }, //march quickly + { + match: "[(march|may)] #Adverb", + group: 0, + tag: 'Verb', + reason: 'march-quickly' + }, //5th of March + { + match: '#Value of #Month', + tag: 'Date', + reason: 'value-of-month' + }, //5 March + { + match: '#Cardinal #Month', + tag: 'Date', + reason: 'cardinal-month' + }, //march 5 to 7 + { + match: '#Month #Value to #Value', + tag: 'Date', + reason: 'value-to-value' + }, //march the 12th + { + match: '#Month the #Value', + tag: 'Date', + reason: 'month-the-value' + }, //june 7 + { + match: '(#WeekDay|#Month) #Value', + tag: 'Date', + reason: 'date-value' + }, //7 june + { + match: '#Value (#WeekDay|#Month)', + tag: 'Date', + reason: 'value-date' + }, //may twenty five + { + match: '(#TextValue && #Date) #TextValue', + tag: 'Date', + reason: 'textvalue-date' + }, // in june + { + match: "in [".concat(dates, "]"), + group: 0, + tag: 'Date', + reason: 'in-june' + }, { + match: "during [".concat(dates, "]"), + group: 0, + tag: 'Date', + reason: 'in-june' + }, { + match: "on [".concat(dates, "]"), + group: 0, + tag: 'Date', + reason: 'in-june' + }, { + match: "by [".concat(dates, "]"), + group: 0, + tag: 'Date', + reason: 'in-june' + }, { + match: "before [".concat(dates, "]"), + group: 0, + tag: 'Date', + reason: 'in-june' + }, { + match: "#Date [".concat(dates, "]"), + group: 0, + tag: 'Date', + reason: 'in-june' + }, // june 1992 + { + match: "".concat(dates, " #Value"), + tag: 'Date', + reason: 'june-5th' + }, { + match: "".concat(dates, " #Date"), + tag: 'Date', + reason: 'june-5th' + }, // June Smith + { + match: "".concat(dates, " #ProperNoun"), + tag: 'Person', + reason: 'june-smith', + safe: true + }, // june m. Cooper + { + match: "".concat(dates, " #Acronym? (#ProperNoun && !#Month)"), + tag: 'Person', + reason: 'june-smith-jr' + }, // 'second' + { + match: "#Cardinal [second]", + tag: 'Unit', + reason: 'one-second' + }]; + var _02Dates = list$1; + + var _03Noun = [// ==== Plural ==== + //there are reasons + { + match: 'there (are|were) #Adjective? [#PresentTense]', + group: 0, + tag: 'Plural', + reason: 'there-are' + }, // ==== Singular ==== + //the sun + { + match: '#Determiner [sun]', + group: 0, + tag: 'Singular', + reason: 'the-sun' + }, //did a 900, paid a 20 + { + match: '#Verb (a|an) [#Value]', + group: 0, + tag: 'Singular', + reason: 'did-a-value' + }, //'the can' + { + match: '#Determiner [(can|will|may)]', + group: 0, + tag: 'Singular', + reason: 'the can' + }, // ==== Possessive ==== + //spencer kelly's + { + match: '#FirstName #Acronym? (#Possessive && #LastName)', + tag: 'Possessive', + reason: 'name-poss' + }, //Super Corp's fundraiser + { + match: '#Organization+ #Possessive', + tag: 'Possessive', + reason: 'org-possessive' + }, //Los Angeles's fundraiser + { + match: '#Place+ #Possessive', + tag: 'Possessive', + reason: 'place-possessive' + }, // assign all tasks + { + match: '#Verb (all|every|each|most|some|no) [#PresentTense]', + group: 0, + tag: 'Noun', + reason: 'all-presentTense' + }, //big dreams, critical thinking + { + match: '(#Adjective && !all) [#PresentTense]', + group: 0, + tag: 'Noun', + reason: 'adj-presentTense' + }, //his fine + { + match: '(his|her|its) [#Adjective]', + group: 0, + tag: 'Noun', + reason: 'his-fine' + }, //some pressing issues + { + match: 'some [#Verb] #Plural', + group: 0, + tag: 'Noun', + reason: 'determiner6' + }, //'more' is not always an adverb + { + match: 'more #Noun', + tag: 'Noun', + reason: 'more-noun' + }, { + match: '(#Noun && @hasComma) #Noun (and|or) [#PresentTense]', + group: 0, + tag: 'Noun', + reason: 'noun-list' + }, //3 feet + { + match: '(right|rights) of .', + tag: 'Noun', + reason: 'right-of' + }, // a bit + { + match: 'a [bit]', + group: 0, + tag: 'Noun', + reason: 'bit-2' + }, //running-a-show + { + match: '#Gerund #Determiner [#Infinitive]', + group: 0, + tag: 'Noun', + reason: 'running-a-show' + }, //the-only-reason + { + match: '#Determiner #Adverb [#Infinitive]', + group: 0, + tag: 'Noun', + reason: 'the-reason' + }, //the nice swim + { + match: '(the|this|those|these) #Adjective [#Verb]', + group: 0, + tag: 'Noun', + reason: 'the-adj-verb' + }, // the truly nice swim + { + match: '(the|this|those|these) #Adverb #Adjective [#Verb]', + group: 0, + tag: 'Noun', + reason: 'determiner4' + }, //the orange is + { + match: '#Determiner [#Adjective] (#Copula|#PastTense|#Auxiliary)', + group: 0, + tag: 'Noun', + reason: 'the-adj-2' + }, // a stream runs + { + match: '(the|this|a|an) [#Infinitive] #Adverb? #Verb', + group: 0, + tag: 'Noun', + reason: 'determiner5' + }, //the test string + { + match: '#Determiner [#Infinitive] #Noun', + group: 0, + tag: 'Noun', + reason: 'determiner7' + }, //by a bear. + { + match: '#Determiner #Adjective [#Infinitive]$', + group: 0, + tag: 'Noun', + reason: 'a-inf' + }, //the wait to vote + { + match: '(the|this) [#Verb] #Preposition .', + group: 0, + tag: 'Noun', + reason: 'determiner1' + }, //a sense of + { + match: '#Determiner [#Verb] of', + group: 0, + tag: 'Noun', + reason: 'the-verb-of' + }, //the threat of force + { + match: '#Determiner #Noun of [#Verb]', + group: 0, + tag: 'Noun', + reason: 'noun-of-noun' + }, //the western line + { + match: '#Determiner [(western|eastern|northern|southern|central)] #Noun', + group: 0, + tag: 'Noun', + reason: 'western-line' + }, //her polling + { + match: '#Possessive [#Gerund]', + group: 0, + tag: 'Noun', + reason: 'her-polling' + }, //her fines + { + match: '(his|her|its) [#PresentTense]', + group: 0, + tag: 'Noun', + reason: 'its-polling' + }, //linear algebra + { + match: '(#Determiner|#Value) [(linear|binary|mobile|lexical|technical|computer|scientific|formal)] #Noun', + group: 0, + tag: 'Noun', + reason: 'technical-noun' + }, // walk the walk + { + match: '(the|those|these) #Adjective? [#Infinitive]', + group: 0, + tag: 'Noun', + reason: 'det-inf' + }, { + match: '(the|those|these) #Adjective? [#PresentTense]', + group: 0, + tag: 'Noun', + reason: 'det-pres' + }, { + match: '(the|those|these) #Adjective? [#PastTense]', + group: 0, + tag: 'Noun', + reason: 'det-past' + }, //air-flow + { + match: '(#Noun && @hasHyphen) #Verb', + tag: 'Noun', + reason: 'hyphen-verb' + }, //is no walk + { + match: 'is no [#Verb]', + group: 0, + tag: 'Noun', + reason: 'is-no-verb' + }, //different views than + { + match: '[#Verb] than', + group: 0, + tag: 'Noun', + reason: 'correction' + }, // goes to sleep + { + match: '(go|goes|went) to [#Infinitive]', + group: 0, + tag: 'Noun', + reason: 'goes-to-verb' + }, //a great run + { + match: '(a|an) #Adjective [(#Infinitive|#PresentTense)]', + tag: 'Noun', + reason: 'a|an2' + }, //a tv show + { + match: '(a|an) #Noun [#Infinitive]', + group: 0, + tag: 'Noun', + reason: 'a-noun-inf' + }, //do so + { + match: 'do [so]', + group: 0, + tag: 'Noun', + reason: 'so-noun' + }, //is mark hughes + { + match: '#Copula [#Infinitive] #Noun', + group: 0, + tag: 'Noun', + reason: 'is-pres-noun' + }, // + // { match: '[#Infinitive] #Copula', group: 0, tag: 'Noun', reason: 'inf-copula' }, + //a close + { + match: '#Determiner #Adverb? [close]', + group: 0, + tag: 'Adjective', + reason: 'a-close' + }, // what the hell + { + match: '#Determiner [(shit|damn|hell)]', + group: 0, + tag: 'Noun', + reason: 'swears-noun' + }]; + + var adjectives$1 = '(misty|rusty|dusty|rich|randy)'; + var list$2 = [// all fell apart + { + match: '[all] #Determiner? #Noun', + group: 0, + tag: 'Adjective', + reason: 'all-noun' + }, // very rusty + { + match: "#Adverb [".concat(adjectives$1, "]"), + group: 0, + tag: 'Adjective', + reason: 'really-rich' + }, // rusty smith + { + match: "".concat(adjectives$1, " #Person"), + tag: 'Person', + reason: 'randy-smith' + }, // rusty a. smith + { + match: "".concat(adjectives$1, " #Acronym? #ProperNoun"), + tag: 'Person', + reason: 'rusty-smith' + }, //sometimes not-adverbs + { + match: '#Copula [(just|alone)]$', + group: 0, + tag: 'Adjective', + reason: 'not-adverb' + }, //jack is guarded + { + match: '#Singular is #Adverb? [#PastTense$]', + group: 0, + tag: 'Adjective', + reason: 'is-filled' + }, // smoked poutine is + { + match: '[#PastTense] #Singular is', + group: 0, + tag: 'Adjective', + reason: 'smoked-poutine' + }, // baked onions are + { + match: '[#PastTense] #Plural are', + group: 0, + tag: 'Adjective', + reason: 'baked-onions' + }, //a staggering cost + { + match: '(a|an) [#Gerund]', + group: 0, + tag: 'Adjective', + reason: 'a|an' + }, // is f*ed up + { + match: '#Copula [fucked up?]', + tag: 'Adjective', + reason: 'swears-adjective' + }, //jack seems guarded + { + match: '#Singular (seems|appears) #Adverb? [#PastTense$]', + group: 0, + tag: 'Adjective', + reason: 'seems-filled' + }]; + var _04Adjective = list$2; + + var _05Adverb = [//still good + { + match: '[still] #Adjective', + group: 0, + tag: 'Adverb', + reason: 'still-advb' + }, //still make + { + match: '[still] #Verb', + group: 0, + tag: 'Adverb', + reason: 'still-verb' + }, // so hot + { + match: '[so] #Adjective', + group: 0, + tag: 'Adverb', + reason: 'so-adv' + }, // all singing + { + match: '[all] #Verb', + group: 0, + tag: 'Adverb', + reason: 'all-verb' + }, // sing like an angel + { + match: '#Verb [like]', + group: 0, + tag: 'Adverb', + reason: 'verb-like' + }, //barely even walk + { + match: '(barely|hardly) even', + tag: 'Adverb', + reason: 'barely-even' + }, //cheering hard - dropped -ly's + { + match: '#PresentTense [(hard|quick|long|bright|slow)]', + group: 0, + tag: 'Adverb', + reason: 'lazy-ly' + }, // much appreciated + { + match: '[much] #Adjective', + group: 0, + tag: 'Adverb', + reason: 'bit-1' + }]; + + var _06Value = [// ==== PhoneNumber ==== + //1 800 ... + { + match: '1 #Value #PhoneNumber', + tag: 'PhoneNumber', + reason: '1-800-Value' + }, //(454) 232-9873 + { + match: '#NumericValue #PhoneNumber', + tag: 'PhoneNumber', + reason: '(800) PhoneNumber' + }, // ==== Currency ==== + // chinese yuan + { + match: '#Demonym #Currency', + tag: 'Currency', + reason: 'demonym-currency' + }, // ==== Ordinal ==== + { + match: '[second] #Noun', + group: 0, + tag: 'Ordinal', + reason: 'second-noun' + }, // ==== Unit ==== + //5 yan + { + match: '#Value+ [#Currency]', + group: 0, + tag: 'Unit', + reason: '5-yan' + }, { + match: '#Value [(foot|feet)]', + group: 0, + tag: 'Unit', + reason: 'foot-unit' + }, //minus 7 + { + match: '(minus|negative) #Value', + tag: 'Value', + reason: 'minus-value' + }, //5 kg. + { + match: '#Value [#Abbreviation]', + group: 0, + tag: 'Unit', + reason: 'value-abbr' + }, { + match: '#Value [k]', + group: 0, + tag: 'Unit', + reason: 'value-k' + }, { + match: '#Unit an hour', + tag: 'Unit', + reason: 'unit-an-hour' + }, //seven point five + { + match: '#Value (point|decimal) #Value', + tag: 'Value', + reason: 'value-point-value' + }, // ten bucks + { + match: '(#Value|a) [(buck|bucks|grand)]', + group: 0, + tag: 'Currency', + reason: 'value-bucks' + }, //quarter million + { + match: '#Determiner [(half|quarter)] #Ordinal', + group: 0, + tag: 'Value', + reason: 'half-ordinal' + }, { + match: 'a #Value', + tag: 'Value', + reason: 'a-value' + }, // ==== Money ==== + { + match: '[#Value+] #Currency', + group: 0, + tag: 'Money', + reason: '15 usd' + }, // thousand and two + { + match: "(hundred|thousand|million|billion|trillion|quadrillion)+ and #Value", + tag: 'Value', + reason: 'magnitude-and-value' + }, //'a/an' can mean 1 - "a hour" + { + match: '!once [(a|an)] (#Duration|hundred|thousand|million|billion|trillion)', + group: 0, + tag: 'Value', + reason: 'a-is-one' + }]; + + var verbs$1 = '(pat|wade|ollie|will|rob|buck|bob|mark|jack)'; + var list$3 = [// ==== Tense ==== + //he left + { + match: '#Noun #Adverb? [left]', + group: 0, + tag: 'PastTense', + reason: 'left-verb' + }, //this rocks + { + match: '(this|that) [#Plural]', + group: 0, + tag: 'PresentTense', + reason: 'this-verbs' + }, // ==== Auxiliary ==== + //was walking + { + match: "[#Copula (#Adverb|not)+?] (#Gerund|#PastTense)", + group: 0, + tag: 'Auxiliary', + reason: 'copula-walking' + }, //support a splattering of auxillaries before a verb + { + match: "[(has|had) (#Adverb|not)+?] #PastTense", + group: 0, + tag: 'Auxiliary', + reason: 'had-walked' + }, //would walk + { + match: "[#Adverb+? (#Modal|did)+ (#Adverb|not)+?] #Verb", + group: 0, + tag: 'Auxiliary', + reason: 'modal-verb' + }, //would have had + { + match: "[#Modal (#Adverb|not)+? have (#Adverb|not)+? had (#Adverb|not)+?] #Verb", + group: 0, + tag: 'Auxiliary', + reason: 'would-have' + }, //would be walking + { + match: "#Modal (#Adverb|not)+? be (#Adverb|not)+? #Verb", + group: 0, + tag: 'Auxiliary', + reason: 'would-be' + }, //had been walking + { + match: "(#Modal|had|has) (#Adverb|not)+? been (#Adverb|not)+? #Verb", + group: 0, + tag: 'Auxiliary', + reason: 'had-been' + }, //was walking + { + match: "[#Copula (#Adverb|not)+?] (#Gerund|#PastTense)", + group: 0, + tag: 'Auxiliary', + reason: 'copula-walking' + }, //support a splattering of auxillaries before a verb + { + match: "[(has|had) (#Adverb|not)+?] #PastTense", + group: 0, + tag: 'Auxiliary', + reason: 'had-walked' + }, // will walk + { + match: '[(do|does|will|have|had)] (not|#Adverb)? #Verb', + group: 0, + tag: 'Auxiliary', + reason: 'have-had' + }, // about to go + { + match: '[about to] #Adverb? #Verb', + group: 0, + tag: ['Auxiliary', 'Verb'], + reason: 'about-to' + }, //would be walking + { + match: "#Modal (#Adverb|not)+? be (#Adverb|not)+? #Verb", + group: 0, + tag: 'Auxiliary', + reason: 'would-be' + }, //would have had + { + match: "[#Modal (#Adverb|not)+? have (#Adverb|not)+? had (#Adverb|not)+?] #Verb", + group: 0, + tag: 'Auxiliary', + reason: 'would-have' + }, //had been walking + { + match: "(#Modal|had|has) (#Adverb|not)+? been (#Adverb|not)+? #Verb", + group: 0, + tag: 'Auxiliary', + reason: 'had-been' + }, // was being driven + { + match: '[(be|being|been)] #Participle', + group: 0, + tag: 'Auxiliary', + reason: 'being-foo' + }, // ==== Phrasal ==== + //'foo-up' + { + match: '(#Verb && @hasHyphen) up', + group: 0, + tag: 'PhrasalVerb', + reason: 'foo-up' + }, { + match: '(#Verb && @hasHyphen) off', + group: 0, + tag: 'PhrasalVerb', + reason: 'foo-off' + }, { + match: '(#Verb && @hasHyphen) over', + group: 0, + tag: 'PhrasalVerb', + reason: 'foo-over' + }, { + match: '(#Verb && @hasHyphen) out', + group: 0, + tag: 'PhrasalVerb', + reason: 'foo-out' + }, //fall over + { + match: '#PhrasalVerb [#PhrasalVerb]', + group: 0, + tag: 'Particle', + reason: 'phrasal-particle' + }, // ==== Copula ==== + //will be running (not copula) + { + match: '[will #Adverb? not? #Adverb? be] #Gerund', + group: 0, + tag: 'Copula', + reason: 'will-be-copula' + }, //for more complex forms, just tag 'be' + { + match: 'will #Adverb? not? #Adverb? [be] #Adjective', + group: 0, + tag: 'Copula', + reason: 'be-copula' + }, // ==== Infinitive ==== + //march to + { + match: '[march] (up|down|back|to|toward)', + group: 0, + tag: 'Infinitive', + reason: 'march-to' + }, //must march + { + match: '#Modal [march]', + group: 0, + tag: 'Infinitive', + reason: 'must-march' + }, //let him glue + { + match: '(let|make|made) (him|her|it|#Person|#Place|#Organization)+ [#Singular] (a|an|the|it)', + group: 0, + tag: 'Infinitive', + reason: 'let-him-glue' + }, //he quickly foo + { + match: '#Noun #Adverb [#Noun]', + group: 0, + tag: 'Verb', + reason: 'quickly-foo' + }, //will secure our + { + match: 'will [#Adjective]', + group: 0, + tag: 'Verb', + reason: 'will-adj' + }, //he disguised the thing + { + match: '#Pronoun [#Adjective] #Determiner #Adjective? #Noun', + group: 0, + tag: 'Verb', + reason: 'he-adj-the' + }, //is eager to go + { + match: '#Copula [#Adjective to] #Verb', + group: 0, + tag: 'Verb', + reason: 'adj-to' + }, // open the door + { + match: '[open] #Determiner', + group: 0, + tag: 'Infinitive', + reason: 'open-the' + }, // would wade + { + match: "#Modal [".concat(verbs$1, "]"), + group: 0, + tag: 'Verb', + reason: 'would-mark' + }, { + match: "#Adverb [".concat(verbs$1, "]"), + group: 0, + tag: 'Verb', + reason: 'really-mark' + }, // wade smith + { + match: "".concat(verbs$1, " #Person"), + tag: 'Person', + reason: 'rob-smith' + }, // wade m. Cooper + { + match: "".concat(verbs$1, " #Acronym? #ProperNoun"), + tag: 'Person', + reason: 'rob-a-smith' + }, // damn them + { + match: '[shit] (#Determiner|#Possessive|them)', + group: 0, + tag: 'Verb', + reason: 'swear1-verb' + }, { + match: '[damn] (#Determiner|#Possessive|them)', + group: 0, + tag: 'Verb', + reason: 'swear2-verb' + }, { + match: '[fuck] (#Determiner|#Possessive|them)', + group: 0, + tag: 'Verb', + reason: 'swear3-verb' + }]; + var _07Verbs = list$3; + + var places = '(paris|alexandria|houston|kobe|salvador|sydney)'; + var list$4 = [// ==== Region ==== + //West Norforlk + { + match: '(west|north|south|east|western|northern|southern|eastern)+ #Place', + tag: 'Region', + reason: 'west-norfolk' + }, //some us-state acronyms (exlude: al, in, la, mo, hi, me, md, ok..) + { + match: '#City [(al|ak|az|ar|ca|ct|dc|fl|ga|id|il|nv|nh|nj|ny|oh|or|pa|sc|tn|tx|ut|vt|pr)]', + group: 0, + tag: 'Region', + reason: 'us-state' + }, //Foo District + { + match: '#ProperNoun+ (district|region|province|county|prefecture|municipality|territory|burough|reservation)', + tag: 'Region', + reason: 'foo-district' + }, //District of Foo + { + match: '(district|region|province|municipality|territory|burough|state) of #ProperNoun', + tag: 'Region', + reason: 'district-of-Foo' + }, // in Foo California + { + match: 'in [#ProperNoun] #Place', + group: 0, + tag: 'Place', + reason: 'propernoun-place' + }, // ==== Address ==== + { + match: '#Value #Noun (st|street|rd|road|crescent|cr|way|tr|terrace|avenue|ave)', + tag: 'Address', + reason: 'address-st' + }, // in houston + { + match: "in [".concat(places, "]"), + group: 0, + tag: 'Place', + reason: 'in-paris' + }, { + match: "near [".concat(places, "]"), + group: 0, + tag: 'Place', + reason: 'near-paris' + }, { + match: "at [".concat(places, "]"), + group: 0, + tag: 'Place', + reason: 'at-paris' + }, { + match: "from [".concat(places, "]"), + group: 0, + tag: 'Place', + reason: 'from-paris' + }, { + match: "to [".concat(places, "]"), + group: 0, + tag: 'Place', + reason: 'to-paris' + }, { + match: "#Place [".concat(places, "]"), + group: 0, + tag: 'Place', + reason: 'tokyo-paris' + }, // houston texas + { + match: "[".concat(places, "] #Place"), + group: 0, + tag: 'Place', + reason: 'paris-france' + }]; + var _08Place = list$4; + + var _09Org = [//John & Joe's + { + match: '#Noun (&|n) #Noun', + tag: 'Organization', + reason: 'Noun-&-Noun' + }, // teachers union of Ontario + { + match: '#Organization of the? #ProperNoun', + tag: 'Organization', + reason: 'org-of-place', + safe: true + }, //walmart USA + { + match: '#Organization #Country', + tag: 'Organization', + reason: 'org-country' + }, //organization + { + match: '#ProperNoun #Organization', + tag: 'Organization', + reason: 'titlecase-org' + }, //FitBit Inc + { + match: '#ProperNoun (ltd|co|inc|dept|assn|bros)', + tag: 'Organization', + reason: 'org-abbrv' + }, // the OCED + { + match: 'the [#Acronym]', + group: 0, + tag: 'Organization', + reason: 'the-acronym', + safe: true + }, // global trade union + { + match: '(world|global|international|national|#Demonym) #Organization', + tag: 'Organization', + reason: 'global-org' + }, // schools + { + match: '#Noun+ (public|private) school', + tag: 'School', + reason: 'noun-public-school' + }]; + + var nouns$1 = '(rose|robin|dawn|ray|holly|bill|joy|viola|penny|sky|violet|daisy|melody|kelvin|hope|mercedes|olive|jewel|faith|van|charity|miles|lily|summer|dolly|rod|dick|cliff|lane|reed|kitty|art|jean|trinity)'; + var months = '(january|april|may|june|jan|sep)'; //summer|autumn + + var list$5 = [// ==== Honorific ==== + { + match: '[(1st|2nd|first|second)] #Honorific', + group: 0, + tag: 'Honorific', + reason: 'ordinal-honorific' + }, { + match: '[(private|general|major|corporal|lord|lady|secretary|premier)] #Honorific? #Person', + group: 0, + tag: 'Honorific', + reason: 'ambg-honorifics' + }, // ==== FirstNames ==== + //is foo Smith + { + match: '#Copula [(#Noun|#PresentTense)] #LastName', + group: 0, + tag: 'FirstName', + reason: 'copula-noun-lastname' + }, //pope francis + { + match: '(lady|queen|sister) #ProperNoun', + tag: 'FemaleName', + reason: 'lady-titlecase', + safe: true + }, { + match: '(king|pope|father) #ProperNoun', + tag: 'MaleName', + reason: 'pope-titlecase', + safe: true + }, //ambiguous-but-common firstnames + { + match: '[(will|may|april|june|said|rob|wade|ray|rusty|drew|miles|jack|chuck|randy|jan|pat|cliff|bill)] #LastName', + group: 0, + tag: 'FirstName', + reason: 'maybe-lastname' + }, // ==== Nickname ==== + // Dwayne 'the rock' Johnson + { + match: '#FirstName [#Determiner #Noun] #LastName', + group: 0, + tag: 'NickName', + reason: 'first-noun-last' + }, //my buddy + { + match: '#Possessive [#FirstName]', + group: 0, + tag: 'Person', + reason: 'possessive-name' + }, { + match: '#Acronym #ProperNoun', + tag: 'Person', + reason: 'acronym-titlecase', + safe: true + }, //ludwig van beethovan + { + match: '#Person (jr|sr|md)', + tag: 'Person', + reason: 'person-honorific' + }, //peter II + { + match: '#Person #Person the? #RomanNumeral', + tag: 'Person', + reason: 'roman-numeral' + }, //'Professor Fink', 'General McCarthy' + { + match: '#FirstName [/^[^aiurck]$/]', + group: 0, + tag: ['Acronym', 'Person'], + reason: 'john-e' + }, //Doctor john smith jr + //general pearson + { + match: '#Honorific #Person', + tag: 'Person', + reason: 'honorific-person' + }, //remove single 'mr' + { + match: '#Honorific #Acronym', + tag: 'Person', + reason: 'Honorific-TitleCase' + }, //j.k Rowling + { + match: '#Noun van der? #Noun', + tag: 'Person', + reason: 'von der noun', + safe: true + }, //king of spain + { + match: '(king|queen|prince|saint|lady) of? #Noun', + tag: 'Person', + reason: 'king-of-noun', + safe: true + }, //Foo U Ford + { + match: '[#ProperNoun] #Person', + group: 0, + tag: 'Person', + reason: 'proper-person', + safe: true + }, // al sharpton + { + match: 'al (#Person|#ProperNoun)', + tag: 'Person', + reason: 'al-borlen', + safe: true + }, //ferdinand de almar + { + match: '#FirstName de #Noun', + tag: 'Person', + reason: 'bill-de-noun' + }, //Osama bin Laden + { + match: '#FirstName (bin|al) #Noun', + tag: 'Person', + reason: 'bill-al-noun' + }, //John L. Foo + { + match: '#FirstName #Acronym #ProperNoun', + tag: 'Person', + reason: 'bill-acronym-title' + }, //Andrew Lloyd Webber + { + match: '#FirstName #FirstName #ProperNoun', + tag: 'Person', + reason: 'bill-firstname-title' + }, //Mr Foo + { + match: '#Honorific #FirstName? #ProperNoun', + tag: 'Person', + reason: 'dr-john-Title' + }, //peter the great + { + match: '#FirstName the #Adjective', + tag: 'Person', + reason: 'name-the-great' + }, //very common-but-ambiguous lastnames + { + match: '#FirstName (green|white|brown|hall|young|king|hill|cook|gray|price)', + tag: 'Person', + reason: 'bill-green' + }, // faith smith + { + match: "".concat(nouns$1, " #Person"), + tag: 'Person', + reason: 'ray-smith', + safe: true + }, // faith m. Smith + { + match: "".concat(nouns$1, " #Acronym? #ProperNoun"), + tag: 'Person', + reason: 'ray-a-smith', + safe: true + }, //give to april + { + match: "#Infinitive #Determiner? #Adjective? #Noun? (to|for) [".concat(months, "]"), + group: 0, + tag: 'Person', + reason: 'ambig-person' + }, // remind june + { + match: "#Infinitive [".concat(months, "]"), + group: 0, + tag: 'Person', + reason: 'infinitive-person' + }, // may waits for + { + match: "[".concat(months, "] #PresentTense for"), + group: 0, + tag: 'Person', + reason: 'ambig-active-for' + }, // may waits to + { + match: "[".concat(months, "] #PresentTense to"), + group: 0, + tag: 'Person', + reason: 'ambig-active-to' + }, // april will + { + match: "[".concat(months, "] #Modal"), + group: 0, + tag: 'Person', + reason: 'ambig-modal' + }, // would april + { + match: "#Modal [".concat(months, "]"), + group: 0, + tag: 'Person', + reason: 'modal-ambig' + }, // it is may + { + match: "#Copula [".concat(months, "]"), + group: 0, + tag: 'Person', + reason: 'is-may' + }, // may is + { + match: "[".concat(months, "] #Copula"), + group: 0, + tag: 'Person', + reason: 'may-is' + }, // with april + { + match: "that [".concat(months, "]"), + group: 0, + tag: 'Person', + reason: 'that-month' + }, // with april + { + match: "with [".concat(months, "]"), + group: 0, + tag: 'Person', + reason: 'with-month' + }, // for april + { + match: "for [".concat(months, "]"), + group: 0, + tag: 'Person', + reason: 'for-month' + }, // this april + { + match: "this [".concat(months, "]"), + group: 0, + tag: 'Month', + reason: 'this-may' + }, //maybe not 'this' + // next april + { + match: "next [".concat(months, "]"), + group: 0, + tag: 'Month', + reason: 'next-may' + }, // last april + { + match: "last [".concat(months, "]"), + group: 0, + tag: 'Month', + reason: 'last-may' + }, // wednesday april + { + match: "#Date [".concat(months, "]"), + group: 0, + tag: 'Month', + reason: 'date-may' + }, // may 5th + { + match: "[".concat(months, "] the? #Value"), + group: 0, + tag: 'Month', + reason: 'may-5th' + }, // 5th of may + { + match: "#Value of [".concat(months, "]"), + group: 0, + tag: 'Month', + reason: '5th-of-may' + }, // dick van dyke + { + match: '#ProperNoun (van|al|bin) #ProperNoun', + tag: 'Person', + reason: 'title-van-title', + safe: true + }, //jose de Sucre + { + match: '#ProperNoun (de|du) la? #ProperNoun', + tag: 'Person', + reason: 'title-de-title', + safe: true + }, //Jani K. Smith + { + match: '#Singular #Acronym #LastName', + tag: '#Person', + reason: 'title-acro-noun', + safe: true + }, //John Foo + { + match: '#FirstName (#Noun && #ProperNoun) #ProperNoun?', + tag: 'Person', + reason: 'firstname-titlecase' + }, //Joe K. Sombrero + { + match: '#FirstName #Acronym #Noun', + tag: 'Person', + reason: 'n-acro-noun', + safe: true + }]; + var _10People = list$5; + + var matches = []; + matches = matches.concat(_01Misc); + matches = matches.concat(_02Dates); + matches = matches.concat(_03Noun); + matches = matches.concat(_04Adjective); + matches = matches.concat(_05Adverb); + matches = matches.concat(_06Value); + matches = matches.concat(_07Verbs); + matches = matches.concat(_08Place); + matches = matches.concat(_09Org); + matches = matches.concat(_10People); // cache the easier conditions up-front + + var cacheRequired$1 = function cacheRequired(reg) { + var needTags = []; + var needWords = []; + reg.forEach(function (obj) { + if (obj.optional === true || obj.negative === true) { + return; + } + + if (obj.tag !== undefined) { + needTags.push(obj.tag); + } + + if (obj.word !== undefined) { + needWords.push(obj.word); + } + }); + return { + tags: _unique(needTags), + words: _unique(needWords) + }; + }; + + var allLists = function allLists(m) { + var more = []; + var lists = m.reg.filter(function (r) { + return r.oneOf !== undefined; + }); + + if (lists.length === 1) { + var i = m.reg.findIndex(function (r) { + return r.oneOf !== undefined; + }); + Object.keys(m.reg[i].oneOf).forEach(function (w) { + var newM = Object.assign({}, m); + newM.reg = newM.reg.slice(0); + newM.reg[i] = Object.assign({}, newM.reg[i]); + newM.reg[i].word = w; + delete newM.reg[i].operator; + delete newM.reg[i].oneOf; + newM.reason += '-' + w; + more.push(newM); + }); + } + + return more; + }; // parse them + + + var all = []; + matches.forEach(function (m) { + m.reg = syntax_1(m.match); + var enumerated = allLists(m); + + if (enumerated.length > 0) { + all = all.concat(enumerated); + } else { + all.push(m); + } + }); + all.forEach(function (m) { + m.required = cacheRequired$1(m.reg); + return m; + }); + var matches_1 = all; + + var hasEvery = function hasEvery(chances) { + if (chances.length === 0) { + return []; + } + + var obj = {}; + chances.forEach(function (arr) { + arr = _unique(arr); + + for (var i = 0; i < arr.length; i++) { + obj[arr[i]] = obj[arr[i]] || 0; + obj[arr[i]] += 1; + } + }); + var res = Object.keys(obj); + res = res.filter(function (k) { + return obj[k] === chances.length; + }); + res = res.map(function (num) { + return Number(num); + }); + return res; + }; + + var runner = function runner(doc) { + //find phrases to try for each match + matches_1.forEach(function (m) { + var allChances = []; + m.required.words.forEach(function (w) { + allChances.push(doc._cache.words[w] || []); + }); + m.required.tags.forEach(function (tag) { + allChances.push(doc._cache.tags[tag] || []); + }); + var worthIt = hasEvery(allChances); + + if (worthIt.length === 0) { + return; + } + + var phrases = worthIt.map(function (index) { + return doc.list[index]; + }); + var tryDoc = doc.buildFrom(phrases); // phrases getting tagged + + var match = tryDoc.match(m.reg, m.group); + + if (match.found) { + if (m.safe === true) { + match.tagSafe(m.tag, m.reason); + } else { + match.tag(m.tag, m.reason); + } + } + }); + }; + + var runner_1 = runner; // console.log(hasEvery([[1, 2, 2, 3], [2, 3], []])) + + // misc: 40ms + //sequence of match-tag statements to correct mis-tags + + var corrections = function corrections(doc) { + runner_1(doc); + fixMisc(doc); + return doc; + }; + + var _04Correction = corrections; + + /** POS-tag all terms in this document */ + + var tagger = function tagger(doc) { + var terms = doc.termList(); // check against any known-words + + doc = _01Init(doc, terms); // everything has gotta be something. ¯\_(:/)_/¯ + + doc = _02Fallbacks(doc, terms); // support "didn't" & "spencer's" + + doc = _03Contractions(doc); //set our cache, to speed things up + + doc.cache(); // wiggle-around the results, so they make more sense + + doc = _04Correction(doc); // remove our cache, as it's invalidated now + + doc.uncache(); // run any user-given tagger functions + + doc.world.taggers.forEach(function (fn) { + fn(doc); + }); + return doc; + }; + + var _02Tagger = tagger; + + var addMethod = function addMethod(Doc) { + /** */ + var Abbreviations = /*#__PURE__*/function (_Doc) { + _inherits(Abbreviations, _Doc); + + var _super = _createSuper(Abbreviations); + + function Abbreviations() { + _classCallCheck(this, Abbreviations); + + return _super.apply(this, arguments); + } + + _createClass(Abbreviations, [{ + key: "stripPeriods", + value: function stripPeriods() { + this.termList().forEach(function (t) { + if (t.tags.Abbreviation === true && t.next) { + t.post = t.post.replace(/^\./, ''); + } + + var str = t.text.replace(/\./, ''); + t.set(str); + }); + return this; + } + }, { + key: "addPeriods", + value: function addPeriods() { + this.termList().forEach(function (t) { + t.post = t.post.replace(/^\./, ''); + t.post = '.' + t.post; + }); + return this; + } + }]); + + return Abbreviations; + }(Doc); + + Abbreviations.prototype.unwrap = Abbreviations.prototype.stripPeriods; + + Doc.prototype.abbreviations = function (n) { + var match = this.match('#Abbreviation'); + + if (typeof n === 'number') { + match = match.get(n); + } + + return new Abbreviations(match.list, this, this.world); + }; + + return Doc; + }; + + var Abbreviations = addMethod; + + var hasPeriod = /\./; + + var addMethod$1 = function addMethod(Doc) { + /** */ + var Acronyms = /*#__PURE__*/function (_Doc) { + _inherits(Acronyms, _Doc); + + var _super = _createSuper(Acronyms); + + function Acronyms() { + _classCallCheck(this, Acronyms); + + return _super.apply(this, arguments); + } + + _createClass(Acronyms, [{ + key: "stripPeriods", + value: function stripPeriods() { + this.termList().forEach(function (t) { + var str = t.text.replace(/\./g, ''); + t.set(str); + }); + return this; + } + }, { + key: "addPeriods", + value: function addPeriods() { + this.termList().forEach(function (t) { + var str = t.text.replace(/\./g, ''); + str = str.split('').join('.'); // don't add a end-period if there's a sentence-end one + + if (hasPeriod.test(t.post) === false) { + str += '.'; + } + + t.set(str); + }); + return this; + } + }]); + + return Acronyms; + }(Doc); + + Acronyms.prototype.unwrap = Acronyms.prototype.stripPeriods; + Acronyms.prototype.strip = Acronyms.prototype.stripPeriods; + + Doc.prototype.acronyms = function (n) { + var match = this.match('#Acronym'); + + if (typeof n === 'number') { + match = match.get(n); + } + + return new Acronyms(match.list, this, this.world); + }; + + return Doc; + }; + + var Acronyms = addMethod$1; + + var addMethod$2 = function addMethod(Doc) { + /** split into approximate sub-sentence phrases */ + Doc.prototype.clauses = function (n) { + // an awkward way to disambiguate a comma use + var commas = this["if"]('@hasComma').notIf('@hasComma @hasComma') //fun, cool... + .notIf('@hasComma . .? (and|or) .') //cool, and fun + .notIf('(#City && @hasComma) #Country') //'toronto, canada' + .notIf('(#Date && @hasComma) #Year') //'july 6, 1992' + .notIf('@hasComma (too|also)$') //at end of sentence + .match('@hasComma'); + var found = this.splitAfter(commas); + var quotes = found.quotations(); + found = found.splitOn(quotes); + var parentheses = found.parentheses(); + found = found.splitOn(parentheses); // it is cool and it is .. + + var conjunctions = found["if"]('#Copula #Adjective #Conjunction (#Pronoun|#Determiner) #Verb').match('#Conjunction'); + found = found.splitBefore(conjunctions); // if it is this then that + + var condition = found["if"]('if .{2,9} then .').match('then'); + found = found.splitBefore(condition); // misc clause partitions + + found = found.splitBefore('as well as .'); + found = found.splitBefore('such as .'); + found = found.splitBefore('in addition to .'); // semicolons, dashes + + found = found.splitAfter('@hasSemicolon'); + found = found.splitAfter('@hasDash'); // passive voice verb - '.. which was robbed is empty' + // let passive = found.match('#Noun (which|that) (was|is) #Adverb? #PastTense #Adverb?') + // if (passive.found) { + // found = found.splitAfter(passive) + // } + // //which the boy robbed + // passive = found.match('#Noun (which|that) the? #Noun+ #Adverb? #PastTense #Adverb?') + // if (passive.found) { + // found = found.splitAfter(passive) + // } + // does there appear to have relative/subordinate clause still? + + var tooLong = found.filter(function (d) { + return d.wordCount() > 5 && d.match('#Verb+').length >= 2; + }); + + if (tooLong.found) { + var m = tooLong.splitAfter('#Noun .* #Verb .* #Noun+'); + found = found.splitOn(m.eq(0)); + } + + if (typeof n === 'number') { + found = found.get(n); + } + + return new Doc(found.list, this, this.world); + }; + + return Doc; + }; + + var Clauses = addMethod$2; + + var addMethod$3 = function addMethod(Doc) { + /** */ + var Contractions = /*#__PURE__*/function (_Doc) { + _inherits(Contractions, _Doc); + + var _super = _createSuper(Contractions); + + function Contractions(list, from, world) { + var _this; + + _classCallCheck(this, Contractions); + + _this = _super.call(this, list, from, world); + _this.contracted = null; + return _this; + } + /** turn didn't into 'did not' */ + + + _createClass(Contractions, [{ + key: "expand", + value: function expand() { + this.list.forEach(function (p) { + var terms = p.terms(); //change the case? + + var isTitlecase = terms[0].isTitleCase(); + terms.forEach(function (t, i) { + //use the implicit text + t.set(t.implicit || t.text); + t.implicit = undefined; //add whitespace + + if (i < terms.length - 1 && t.post === '') { + t.post += ' '; + } + }); //set titlecase + + if (isTitlecase) { + terms[0].toTitleCase(); + } + }); + return this; + } + }]); + + return Contractions; + }(Doc); //find contractable, expanded-contractions + // const findExpanded = r => { + // let remain = r.not('#Contraction') + // let m = remain.match('(#Noun|#QuestionWord) (#Copula|did|do|have|had|could|would|will)') + // m.concat(remain.match('(they|we|you|i) have')) + // m.concat(remain.match('i am')) + // m.concat(remain.match('(#Copula|#Modal|do|does|have|has|can|will) not')) + // return m + // } + + + Doc.prototype.contractions = function (n) { + //find currently-contracted + var found = this.match('@hasContraction+'); //(may want to split these up) + //todo: split consecutive contractions + + if (typeof n === 'number') { + found = found.get(n); + } + + return new Contractions(found.list, this, this.world); + }; //aliases + + + Doc.prototype.expanded = Doc.prototype.isExpanded; + Doc.prototype.contracted = Doc.prototype.isContracted; + return Doc; + }; + + var Contractions = addMethod$3; + + var addMethod$4 = function addMethod(Doc) { + //pull it apart.. + var parse = function parse(doc) { + var things = doc.splitAfter('@hasComma').splitOn('(and|or) not?').not('(and|or) not?'); + var beforeLast = doc.match('[.] (and|or)', 0); + return { + things: things, + conjunction: doc.match('(and|or) not?'), + beforeLast: beforeLast, + hasOxford: beforeLast.has('@hasComma') + }; + }; + /** cool, fun, and nice */ + + + var Lists = /*#__PURE__*/function (_Doc) { + _inherits(Lists, _Doc); + + var _super = _createSuper(Lists); + + function Lists() { + _classCallCheck(this, Lists); + + return _super.apply(this, arguments); + } + + _createClass(Lists, [{ + key: "conjunctions", + + /** coordinating conjunction */ + value: function conjunctions() { + return this.match('(and|or)'); + } + /** split-up by list object */ + + }, { + key: "parts", + value: function parts() { + return this.splitAfter('@hasComma').splitOn('(and|or) not?'); + } + /** remove the conjunction */ + + }, { + key: "items", + value: function items() { + return parse(this).things; + } + /** add a new unit to the list */ + + }, { + key: "add", + value: function add(str) { + this.forEach(function (p) { + var beforeLast = parse(p).beforeLast; + beforeLast.append(str); //add a comma to it + + beforeLast.termList(0).addPunctuation(','); + }); + return this; + } + /** remove any matching unit from the list */ + + }, { + key: "remove", + value: function remove(match) { + return this.items()["if"](match).remove(); + } + /** return only lists that use a serial comma */ + + }, { + key: "hasOxfordComma", + value: function hasOxfordComma() { + return this.filter(function (doc) { + return parse(doc).hasOxford; + }); + } + }, { + key: "addOxfordComma", + value: function addOxfordComma() { + var items = this.items(); + var needsComma = items.eq(items.length - 2); + + if (needsComma.found && needsComma.has('@hasComma') === false) { + needsComma.post(', '); + } + + return this; + } + }, { + key: "removeOxfordComma", + value: function removeOxfordComma() { + var items = this.items(); + var needsComma = items.eq(items.length - 2); + + if (needsComma.found && needsComma.has('@hasComma') === true) { + needsComma.post(' '); + } + + return this; + } + }]); + + return Lists; + }(Doc); // aliases + + + Lists.prototype.things = Lists.prototype.items; + + Doc.prototype.lists = function (n) { + var m = this["if"]('@hasComma+ .? (and|or) not? .'); // person-list + + var nounList = m.match('(#Noun|#Adjective|#Determiner|#Article)+ #Conjunction not? (#Article|#Determiner)? #Adjective? #Noun+')["if"]('#Noun'); + var adjList = m.match('(#Adjective|#Adverb)+ #Conjunction not? #Adverb? #Adjective+'); + var verbList = m.match('(#Verb|#Adverb)+ #Conjunction not? #Adverb? #Verb+'); + var result = nounList.concat(adjList); + result = result.concat(verbList); + result = result["if"]('@hasComma'); + + if (typeof n === 'number') { + result = m.get(n); + } + + return new Lists(result.list, this, this.world); + }; + + return Doc; + }; + + var Lists = addMethod$4; + + var noPlural = '(#Pronoun|#Place|#Value|#Person|#Uncountable|#Month|#WeekDay|#Holiday|#Possessive)'; //certain words can't be plural, like 'peace' + + var hasPlural = function hasPlural(doc) { + if (doc.has('#Plural') === true) { + return true; + } // these can't be plural + + + if (doc.has(noPlural) === true) { + return false; + } + + return true; + }; + + var hasPlural_1 = hasPlural; + + var irregulars$5 = { + hour: 'an', + heir: 'an', + heirloom: 'an', + honest: 'an', + honour: 'an', + honor: 'an', + uber: 'an' //german u + + }; //pronounced letters of acronyms that get a 'an' + + var an_acronyms = { + a: true, + e: true, + f: true, + h: true, + i: true, + l: true, + m: true, + n: true, + o: true, + r: true, + s: true, + x: true + }; //'a' regexes + + var a_regexs = [/^onc?e/i, //'wu' sound of 'o' + /^u[bcfhjkqrstn][aeiou]/i, // 'yu' sound for hard 'u' + /^eul/i]; + + var makeArticle = function makeArticle(doc) { + //no 'the john smith', but 'a london hotel' + if (doc.has('#Person') || doc.has('#Place')) { + return ''; + } //no a/an if it's plural + + + if (doc.has('#Plural')) { + return 'the'; + } + + var str = doc.text('normal').trim(); //explicit irregular forms + + if (irregulars$5.hasOwnProperty(str)) { + return irregulars$5[str]; + } //spelled-out acronyms + + + var firstLetter = str.substr(0, 1); + + if (doc.has('^@isAcronym') && an_acronyms.hasOwnProperty(firstLetter)) { + return 'an'; + } //'a' regexes + + + for (var i = 0; i < a_regexs.length; i++) { + if (a_regexs[i].test(str)) { + return 'a'; + } + } //basic vowel-startings + + + if (/^[aeiou]/i.test(str)) { + return 'an'; + } + + return 'a'; + }; + + var getArticle = makeArticle; + + //similar to plural/singularize rules, but not the same + var isPlural$1 = [/(antenn|formul|nebul|vertebr|vit)ae$/i, /(octop|vir|radi|nucle|fung|cact|stimul)i$/i, /men$/i, /.tia$/i, /(m|l)ice$/i]; //similar to plural/singularize rules, but not the same + + var isSingular$1 = [/(ax|test)is$/i, /(octop|vir|radi|nucle|fung|cact|stimul)us$/i, /(octop|vir)i$/i, /(rl)f$/i, /(alias|status)$/i, /(bu)s$/i, /(al|ad|at|er|et|ed|ad)o$/i, /(ti)um$/i, /(ti)a$/i, /sis$/i, /(?:(^f)fe|(lr)f)$/i, /hive$/i, /(^aeiouy|qu)y$/i, /(x|ch|ss|sh|z)$/i, /(matr|vert|ind|cort)(ix|ex)$/i, /(m|l)ouse$/i, /(m|l)ice$/i, /(antenn|formul|nebul|vertebr|vit)a$/i, /.sis$/i, /^(?!talis|.*hu)(.*)man$/i]; + var _rules$2 = { + isSingular: isSingular$1, + isPlural: isPlural$1 + }; + + var endS = /s$/; // double-check this term, if it is not plural, or singular. + // (this is a partial copy of ./tagger/fallbacks/plural) + // fallback plural if it ends in an 's'. + + var isPlural$2 = function isPlural(str) { + // isSingular suffix rules + if (_rules$2.isSingular.find(function (reg) { + return reg.test(str); + })) { + return false; + } // does it end in an s? + + + if (endS.test(str) === true) { + return true; + } // is it a plural like 'fungi'? + + + if (_rules$2.isPlural.find(function (reg) { + return reg.test(str); + })) { + return true; + } + + return null; + }; + + var isPlural_1$1 = isPlural$2; + + var exceptions = { + he: 'his', + she: 'hers', + they: 'theirs', + we: 'ours', + i: 'mine', + you: 'yours', + her: 'hers', + their: 'theirs', + our: 'ours', + my: 'mine', + your: 'yours' + }; // turn "David" to "David's" + + var toPossessive = function toPossessive(doc) { + var str = doc.text('text').trim(); // exceptions + + if (exceptions.hasOwnProperty(str)) { + doc.replaceWith(exceptions[str], true); + doc.tag('Possessive', 'toPossessive'); + return; + } // flanders' + + + if (/s$/.test(str)) { + str += "'"; + doc.replaceWith(str, true); + doc.tag('Possessive', 'toPossessive'); + return; + } //normal form: + + + str += "'s"; + doc.replaceWith(str, true); + doc.tag('Possessive', 'toPossessive'); + return; + }; + + var toPossessive_1 = toPossessive; + + // .nouns() supports some noun-phrase-ish groupings + // pull these apart, if necessary + var parse$1 = function parse(doc) { + var res = { + main: doc + }; //support 'mayor of chicago' as one noun-phrase + + if (doc.has('#Noun (of|by|for) .')) { + var m = doc.splitAfter('[#Noun+]', 0); + res.main = m.eq(0); + res.post = m.eq(1); + } + + return res; + }; + + var parse_1 = parse$1; + + var methods$6 = { + /** overload the original json with noun information */ + json: function json(options) { + var n = null; + + if (typeof options === 'number') { + n = options; + options = null; + } + + options = options || { + text: true, + normal: true, + trim: true, + terms: true + }; + var res = []; + this.forEach(function (doc) { + var json = doc.json(options)[0]; + json.article = getArticle(doc); + res.push(json); + }); + + if (n !== null) { + return res[n]; + } + + return res; + }, + + /** get all adjectives describing this noun*/ + adjectives: function adjectives() { + var list = this.lookAhead('^(that|who|which)? (was|is|will)? be? #Adverb? #Adjective+'); + list = list.concat(this.lookBehind('#Adjective+ #Adverb?$')); + list = list.match('#Adjective'); + return list.sort('index'); + }, + isPlural: function isPlural() { + return this["if"]('#Plural'); //assume tagger has run? + }, + hasPlural: function hasPlural() { + return this.filter(function (d) { + return hasPlural_1(d); + }); + }, + toPlural: function toPlural(agree) { + var _this = this; + + var toPlural = this.world.transforms.toPlural; + this.forEach(function (doc) { + if (doc.has('#Plural') || hasPlural_1(doc) === false) { + return; + } // double-check it isn't an un-tagged plural + + + var main = parse_1(doc).main; + var str = main.text('reduced'); + + if (!main.has('#Singular') && isPlural_1$1(str) === true) { + return; + } + + str = toPlural(str, _this.world); + main.replace(str).tag('#Plural'); // 'an apple' -> 'apples' + + if (agree) { + var an = main.lookBefore('(an|a) #Adjective?$').not('#Adjective'); + + if (an.found === true) { + an.remove(); + } + } + }); + return this; + }, + toSingular: function toSingular(agree) { + var _this2 = this; + + var toSingular = this.world.transforms.toSingular; + this.forEach(function (doc) { + if (doc.has('^#Singular+$') || hasPlural_1(doc) === false) { + return; + } // double-check it isn't an un-tagged plural + + + var main = parse_1(doc).main; + var str = main.text('reduced'); + + if (!main.has('#Plural') && isPlural_1$1(str) !== true) { + return; + } + + str = toSingular(str, _this2.world); + main.replace(str).tag('#Singular'); // add an article + + if (agree) { + // 'apples' -> 'an apple' + var start = doc; + var adj = doc.lookBefore('#Adjective'); + + if (adj.found) { + start = adj; + } + + var article = getArticle(start); + start.insertBefore(article); + } + }); + return this; + }, + toPossessive: function toPossessive() { + this.forEach(function (d) { + toPossessive_1(d); + }); + return this; + } + }; + var methods_1 = methods$6; + + var addMethod$5 = function addMethod(Doc) { + /** */ + var Nouns = /*#__PURE__*/function (_Doc) { + _inherits(Nouns, _Doc); + + var _super = _createSuper(Nouns); + + function Nouns() { + _classCallCheck(this, Nouns); + + return _super.apply(this, arguments); + } + + return Nouns; + }(Doc); // add-in our methods + + + Object.assign(Nouns.prototype, methods_1); + + Doc.prototype.nouns = function (n) { + // don't split 'paris, france' + var keep = this.match('(#City && @hasComma) (#Region|#Country)'); // but split the other commas + + var m = this.not(keep).splitAfter('@hasComma'); // combine them back together + + m = m.concat(keep); + m = m.match('#Noun+ (of|by)? the? #Noun+?'); //nouns that we don't want in these results, for weird reasons + + m = m.not('#Pronoun'); + m = m.not('(there|these)'); + m = m.not('(#Month|#WeekDay)'); //allow Durations, Holidays + // //allow possessives like "spencer's", but not generic ones like, + + m = m.not('(my|our|your|their|her|his)'); + m = m.not('(of|for|by|the)$'); + + if (typeof n === 'number') { + m = m.get(n); + } + + return new Nouns(m.list, this, this.world); + }; + + return Doc; + }; + + var Nouns = addMethod$5; + + var open = /\(/; + var close = /\)/; + + var addMethod$6 = function addMethod(Doc) { + /** anything between (these things) */ + var Parentheses = /*#__PURE__*/function (_Doc) { + _inherits(Parentheses, _Doc); + + var _super = _createSuper(Parentheses); + + function Parentheses() { + _classCallCheck(this, Parentheses); + + return _super.apply(this, arguments); + } + + _createClass(Parentheses, [{ + key: "unwrap", + + /** remove the parentheses characters */ + value: function unwrap() { + this.list.forEach(function (p) { + var first = p.terms(0); + first.pre = first.pre.replace(open, ''); + var last = p.lastTerm(); + last.post = last.post.replace(close, ''); + }); + return this; + } + }]); + + return Parentheses; + }(Doc); + + Doc.prototype.parentheses = function (n) { + var list = []; + this.list.forEach(function (p) { + var terms = p.terms(); //look for opening brackets + + for (var i = 0; i < terms.length; i += 1) { + var t = terms[i]; + + if (open.test(t.pre)) { + //look for the closing bracket.. + for (var o = i; o < terms.length; o += 1) { + if (close.test(terms[o].post)) { + var len = o - i + 1; + list.push(p.buildFrom(t.id, len)); + i = o; + break; + } + } + } + } + }); //support nth result + + if (typeof n === 'number') { + if (list[n]) { + list = [list[n]]; + } else { + list = []; + } + + return new Parentheses(list, this, this.world); + } + + return new Parentheses(list, this, this.world); + }; + + return Doc; + }; + + var Parentheses = addMethod$6; + + var addMethod$7 = function addMethod(Doc) { + /** */ + var Possessives = /*#__PURE__*/function (_Doc) { + _inherits(Possessives, _Doc); + + var _super = _createSuper(Possessives); + + function Possessives(list, from, world) { + var _this; + + _classCallCheck(this, Possessives); + + _this = _super.call(this, list, from, world); + _this.contracted = null; + return _this; + } + /** turn didn't into 'did not' */ + + + _createClass(Possessives, [{ + key: "strip", + value: function strip() { + this.list.forEach(function (p) { + var terms = p.terms(); + terms.forEach(function (t) { + var str = t.text.replace(/'s$/, ''); + t.set(str || t.text); + }); + }); + return this; + } + }]); + + return Possessives; + }(Doc); //find contractable, expanded-contractions + // const findExpanded = r => { + // let remain = r.not('#Contraction') + // let m = remain.match('(#Noun|#QuestionWord) (#Copula|did|do|have|had|could|would|will)') + // m.concat(remain.match('(they|we|you|i) have')) + // m.concat(remain.match('i am')) + // m.concat(remain.match('(#Copula|#Modal|do|does|have|has|can|will) not')) + // return m + // } + + + Doc.prototype.possessives = function (n) { + //find currently-contracted + var found = this.match('#Noun+? #Possessive'); //todo: split consecutive contractions + + if (typeof n === 'number') { + found = found.get(n); + } + + return new Possessives(found.list, this, this.world); + }; + + return Doc; + }; + + var Possessives = addMethod$7; + + var pairs = { + "\"": "\"", + // 'StraightDoubleQuotes' + "\uFF02": "\uFF02", + // 'StraightDoubleQuotesWide' + "'": "'", + // 'StraightSingleQuotes' + "\u201C": "\u201D", + // 'CommaDoubleQuotes' + "\u2018": "\u2019", + // 'CommaSingleQuotes' + "\u201F": "\u201D", + // 'CurlyDoubleQuotesReversed' + "\u201B": "\u2019", + // 'CurlySingleQuotesReversed' + "\u201E": "\u201D", + // 'LowCurlyDoubleQuotes' + "\u2E42": "\u201D", + // 'LowCurlyDoubleQuotesReversed' + "\u201A": "\u2019", + // 'LowCurlySingleQuotes' + "\xAB": "\xBB", + // 'AngleDoubleQuotes' + "\u2039": "\u203A", + // 'AngleSingleQuotes' + // Prime 'non quotation' + "\u2035": "\u2032", + // 'PrimeSingleQuotes' + "\u2036": "\u2033", + // 'PrimeDoubleQuotes' + "\u2037": "\u2034", + // 'PrimeTripleQuotes' + // Prime 'quotation' variation + "\u301D": "\u301E", + // 'PrimeDoubleQuotes' + "`": "\xB4", + // 'PrimeSingleQuotes' + "\u301F": "\u301E" // 'LowPrimeDoubleQuotesReversed' + + }; + var hasOpen = RegExp('(' + Object.keys(pairs).join('|') + ')'); + + var addMethod$8 = function addMethod(Doc) { + /** "these things" */ + var Quotations = /*#__PURE__*/function (_Doc) { + _inherits(Quotations, _Doc); + + var _super = _createSuper(Quotations); + + function Quotations() { + _classCallCheck(this, Quotations); + + return _super.apply(this, arguments); + } + + _createClass(Quotations, [{ + key: "unwrap", + + /** remove the quote characters */ + value: function unwrap() { + return this; + } + }]); + + return Quotations; + }(Doc); + + Doc.prototype.quotations = function (n) { + var list = []; + this.list.forEach(function (p) { + var terms = p.terms(); //look for opening quotes + + for (var i = 0; i < terms.length; i += 1) { + var t = terms[i]; + + if (hasOpen.test(t.pre)) { + var _char = (t.pre.match(hasOpen) || [])[0]; + var want = pairs[_char]; // if (!want) { + // console.warn('missing quote char ' + char) + // } + //look for the closing bracket.. + + for (var o = i; o < terms.length; o += 1) { + if (terms[o].post.indexOf(want) !== -1) { + var len = o - i + 1; + list.push(p.buildFrom(t.id, len)); + i = o; + break; + } + } + } + } + }); //support nth result + + if (typeof n === 'number') { + if (list[n]) { + list = [list[n]]; + } else { + list = []; + } + + return new Quotations(list, this, this.world); + } + + return new Quotations(list, this, this.world); + }; // alias + + + Doc.prototype.quotes = Doc.prototype.quotations; + return Doc; + }; + + var Quotations = addMethod$8; + + // walked => walk - turn a verb into it's root form + var toInfinitive$1 = function toInfinitive(parsed, world) { + var verb = parsed.verb; // console.log(parsed) + // verb.debug() + //1. if it's already infinitive + + var str = verb.text('normal'); + + if (verb.has('#Infinitive')) { + return str; + } // 2. world transform does the heavy-lifting + + + var tense = null; + + if (verb.has('#PastTense')) { + tense = 'PastTense'; + } else if (verb.has('#Gerund')) { + tense = 'Gerund'; + } else if (verb.has('#PresentTense')) { + tense = 'PresentTense'; + } else if (verb.has('#Participle')) { + tense = 'Participle'; + } else if (verb.has('#Actor')) { + tense = 'Actor'; + } + + return world.transforms.toInfinitive(str, world, tense); + }; + + var toInfinitive_1$1 = toInfinitive$1; + + // spencer walks -> singular + // we walk -> plural + // the most-recent noun-phrase, before this verb. + var findNoun = function findNoun(vb) { + var noun = vb.lookBehind('#Noun+').last(); + return noun; + }; //sometimes you can tell if a verb is plural/singular, just by the verb + // i am / we were + // othertimes you need its subject 'we walk' vs 'i walk' + + + var isPlural$3 = function isPlural(parsed) { + var vb = parsed.verb; + + if (vb.has('(are|were|does)') || parsed.auxiliary.has('(are|were|does)')) { + return true; + } + + if (vb.has('(is|am|do|was)') || parsed.auxiliary.has('(is|am|do|was)')) { + return false; + } //consider its prior noun + + + var noun = findNoun(vb); + + if (noun.has('(we|they|you)')) { + return true; + } + + if (noun.has('#Plural')) { + return true; + } + + if (noun.has('#Singular')) { + return false; + } + + return null; + }; + + var isPlural_1$2 = isPlural$3; + + // #Copula : is -> 'is not' + // #PastTense : walked -> did not walk + // #PresentTense : walks -> does not walk + // #Gerund : walking: -> not walking + // #Infinitive : walk -> do not walk + + var toNegative = function toNegative(parsed, world) { + var vb = parsed.verb; // if it's already negative... + + if (parsed.negative.found) { + return; + } // would walk -> would not walk + + + if (parsed.auxiliary.found) { + parsed.auxiliary.eq(0).append('not'); // 'would not have' ➔ 'would not have' + + if (parsed.auxiliary.has('#Modal have not')) { + parsed.auxiliary.replace('have not', 'not have'); + } + + return; + } // is walking -> is not walking + + + if (vb.has('(#Copula|will|has|had|do)')) { + vb.append('not'); + return; + } // walked -> did not walk + + + if (vb.has('#PastTense')) { + var inf = toInfinitive_1$1(parsed, world); + vb.replaceWith(inf, true); + vb.prepend('did not'); + return; + } // walks -> does not walk + + + if (vb.has('#PresentTense')) { + var _inf = toInfinitive_1$1(parsed, world); + + vb.replaceWith(_inf, true); + + if (isPlural_1$2(parsed)) { + vb.prepend('do not'); + } else { + vb.prepend('does not'); + } + + return; + } //walking -> not walking + + + if (vb.has('#Gerund')) { + var _inf2 = toInfinitive_1$1(parsed, world); + + vb.replaceWith(_inf2, true); + vb.prepend('not'); + return; + } //fallback 1: walk -> does not walk + + + if (isPlural_1$2(parsed)) { + vb.prepend('does not'); + return; + } //fallback 2: walk -> do not walk + + + vb.prepend('do not'); + return; + }; + + var toNegative_1 = toNegative; + + // turn 'would not really walk up' into parts + var parseVerb = function parseVerb(vb) { + var parsed = { + adverb: vb.match('#Adverb+'), + // 'really' + negative: vb.match('#Negative'), + // 'not' + auxiliary: vb.match('#Auxiliary+').not('(#Negative|#Adverb)'), + // 'will' of 'will go' + particle: vb.match('#Particle'), + // 'up' of 'pull up' + verb: vb.match('#Verb+').not('(#Adverb|#Negative|#Auxiliary|#Particle)') + }; // fallback, if no verb found + + if (!parsed.verb.found) { + // blank-everything + Object.keys(parsed).forEach(function (k) { + parsed[k] = parsed[k].not('.'); + }); // it's all the verb + + parsed.verb = vb; + return parsed; + } // + + + if (parsed.adverb && parsed.adverb.found) { + var match = parsed.adverb.text('reduced') + '$'; + + if (vb.has(match)) { + parsed.adverbAfter = true; + } + } + + return parsed; + }; + + var parse$2 = parseVerb; + + /** too many special cases for is/was/will be*/ + + var toBe = function toBe(parsed) { + var isI = false; + var plural = isPlural_1$2(parsed); + var isNegative = parsed.negative.found; //account for 'i is' -> 'i am' irregular + // if (vb.parent && vb.parent.has('i #Adverb? #Copula')) { + // isI = true; + // } + // 'i look', not 'i looks' + + if (parsed.verb.lookBehind('(i|we) (#Adverb|#Verb)?$').found) { + isI = true; + } + + var obj = { + PastTense: 'was', + PresentTense: 'is', + FutureTense: 'will be', + Infinitive: 'is', + Gerund: 'being', + Actor: '', + PerfectTense: 'been', + Pluperfect: 'been' + }; //"i is" -> "i am" + + if (isI === true) { + obj.PresentTense = 'am'; + obj.Infinitive = 'am'; + } + + if (plural) { + obj.PastTense = 'were'; + obj.PresentTense = 'are'; + obj.Infinitive = 'are'; + } + + if (isNegative) { + obj.PastTense += ' not'; + obj.PresentTense += ' not'; + obj.FutureTense = 'will not be'; + obj.Infinitive += ' not'; + obj.PerfectTense = 'not ' + obj.PerfectTense; + obj.Pluperfect = 'not ' + obj.Pluperfect; + obj.Gerund = 'not ' + obj.Gerund; + } + + return obj; + }; + + var toBe_1 = toBe; + + // 'may/could/should' -> 'may/could/should have' + var doModal = function doModal(parsed) { + var str = parsed.verb.text(); + var res = { + PastTense: str + ' have', + PresentTense: str, + FutureTense: str, + Infinitive: str // Gerund: , + // Actor: '', + // PerfectTense: '', + // Pluperfect: '', + + }; + return res; + }; + + var doModal_1 = doModal; + + var conjugate$2 = function conjugate(parsed, world) { + var verb = parsed.verb; //special handling of 'is', 'will be', etc. + + if (verb.has('#Copula') || verb.out('normal') === 'be' && parsed.auxiliary.has('will')) { + return toBe_1(parsed); + } // special handling of 'he could.' + + + if (verb.has('#Modal')) { + return doModal_1(parsed); + } + + var hasHyphen = parsed.verb.termList(0).hasHyphen(); + var infinitive = toInfinitive_1$1(parsed, world); + + if (!infinitive) { + return {}; + } + + var forms = world.transforms.conjugate(infinitive, world); + forms.Infinitive = infinitive; // add particle to phrasal verbs ('fall over') + + if (parsed.particle.found) { + var particle = parsed.particle.text(); + var space = hasHyphen === true ? '-' : ' '; + Object.keys(forms).forEach(function (k) { + return forms[k] += space + particle; + }); + } //put the adverb at the end? + // if (parsed.adverb.found) { + // let adverb = parsed.adverb.text() + // let space = hasHyphen === true ? '-' : ' ' + // if (parsed.adverbAfter === true) { + // Object.keys(forms).forEach(k => (forms[k] += space + adverb)) + // } else { + // Object.keys(forms).forEach(k => (forms[k] = adverb + space + forms[k])) + // } + // } + //apply negative + + + var isNegative = parsed.negative.found; + + if (isNegative) { + forms.PastTense = 'did not ' + forms.Infinitive; + forms.PresentTense = 'does not ' + forms.Infinitive; + forms.Gerund = 'not ' + forms.Gerund; + } //future Tense is pretty straightforward + + + if (!forms.FutureTense) { + if (isNegative) { + forms.FutureTense = 'will not ' + forms.Infinitive; + } else { + forms.FutureTense = 'will ' + forms.Infinitive; + } + } + + if (isNegative) { + forms.Infinitive = 'not ' + forms.Infinitive; + } + + return forms; + }; + + var conjugate_1$1 = conjugate$2; + + // if something is 'modal-ish' we are forced to use past-participle + // ('i could drove' is wrong) + + var useParticiple = function useParticiple(parsed) { + if (parsed.auxiliary.has('(could|should|would|may|can|must)')) { + return true; + } + + if (parsed.auxiliary.has('am .+? being')) { + return true; + } + + if (parsed.auxiliary.has('had .+? been')) { + return true; + } + + return false; + }; // conjugate 'drive' ➔ 'have driven' + + + var toParticiple = function toParticiple(parsed, world) { + //is it already a participle? + if (parsed.auxiliary.has('(have|had)') && parsed.verb.has('#Participle')) { + return; + } // try to swap the main verb to its participle form + + + var obj = conjugate_1$1(parsed, world); + var str = obj.Participle || obj.PastTense; + + if (str) { + parsed.verb.replaceWith(str, false); + } // 'am being driven' ➔ 'have been driven' + + + if (parsed.auxiliary.has('am .+? being')) { + parsed.auxiliary.remove('am'); + parsed.auxiliary.replace('being', 'have been'); + } // add a 'have' + + + if (!parsed.auxiliary.has('have')) { + parsed.auxiliary.append('have'); + } // tag it as a participle + + + parsed.verb.tag('Participle', 'toParticiple'); // turn 'i can swim' to -> 'i could swim' + + parsed.auxiliary.replace('can', 'could'); //'must be' ➔ 'must have been' + + parsed.auxiliary.replace('be have', 'have been'); //'not have' ➔ 'have not' + + parsed.auxiliary.replace('not have', 'have not'); // ensure all new words are tagged right + + parsed.auxiliary.tag('Auxiliary'); + }; + + var participle = { + useParticiple: useParticiple, + toParticiple: toParticiple + }; + + var _toParticiple = participle.toParticiple, + useParticiple$1 = participle.useParticiple; // remove any tense-information in auxiliary verbs + + var makeNeutral = function makeNeutral(parsed) { + //remove tense-info from auxiliaries + parsed.auxiliary.remove('(will|are|am|being)'); + parsed.auxiliary.remove('(did|does)'); + parsed.auxiliary.remove('(had|has|have)'); //our conjugation includes the 'not' and the phrasal-verb particle + + parsed.particle.remove(); + parsed.negative.remove(); + return parsed; + }; + + var methods$7 = { + /** overload the original json with verb information */ + json: function json(options) { + var _this = this; + + var n = null; + + if (typeof options === 'number') { + n = options; + options = null; + } + + options = options || { + text: true, + normal: true, + trim: true, + terms: true + }; + var res = []; + this.forEach(function (p) { + var json = p.json(options)[0]; + var parsed = parse$2(p); + json.parts = {}; + Object.keys(parsed).forEach(function (k) { + if (parsed[k] && parsed[k].isA === 'Doc') { + json.parts[k] = parsed[k].text('normal'); + } else { + json.parts[k] = parsed[k]; + } + }); + json.isNegative = p.has('#Negative'); + json.conjugations = conjugate_1$1(parsed, _this.world); + res.push(json); + }); + + if (n !== null) { + return res[n]; + } + + return res; + }, + + /** grab the adverbs describing these verbs */ + adverbs: function adverbs() { + var list = []; // look at internal adverbs + + this.forEach(function (vb) { + var advb = parse$2(vb).adverb; + + if (advb.found) { + list = list.concat(advb.list); + } + }); // look for leading adverbs + + var m = this.lookBehind('#Adverb+$'); + + if (m.found) { + list = m.list.concat(list); + } // look for trailing adverbs + + + m = this.lookAhead('^#Adverb+'); + + if (m.found) { + list = list.concat(m.list); + } + + return this.buildFrom(list); + }, + /// Verb Inflection + + /**return verbs like 'we walk' and not 'spencer walks' */ + isPlural: function isPlural() { + var _this2 = this; + + var list = []; + this.forEach(function (vb) { + var parsed = parse$2(vb); + + if (isPlural_1$2(parsed, _this2.world) === true) { + list.push(vb.list[0]); + } + }); + return this.buildFrom(list); + }, + + /** return verbs like 'spencer walks' and not 'we walk' */ + isSingular: function isSingular() { + var _this3 = this; + + var list = []; + this.forEach(function (vb) { + var parsed = parse$2(vb); + + if (isPlural_1$2(parsed, _this3.world) === false) { + list.push(vb.list[0]); + } + }); + return this.buildFrom(list); + }, + /// Conjugation + + /** return all forms of this verb */ + conjugate: function conjugate() { + var _this4 = this; + + var result = []; + this.forEach(function (vb) { + var parsed = parse$2(vb); + + var forms = conjugate_1$1(parsed, _this4.world); + + result.push(forms); + }); + return result; + }, + + /** walk ➔ walked*/ + toPastTense: function toPastTense() { + var _this5 = this; + + this.forEach(function (vb) { + var parsed = parse$2(vb); // should we support 'would swim' ➔ 'would have swam' + + if (useParticiple$1(parsed)) { + _toParticiple(parsed, _this5.world); + + return; + } + + var str = conjugate_1$1(parsed, _this5.world).PastTense; + + if (str) { + parsed = makeNeutral(parsed); + parsed.verb.replaceWith(str, false); // vb.tag('PastTense') + } + }); + return this; + }, + + /** walk ➔ walks */ + toPresentTense: function toPresentTense() { + var _this6 = this; + + this.forEach(function (vb) { + var parsed = parse$2(vb); + + var obj = conjugate_1$1(parsed, _this6.world); + + var str = obj.PresentTense; // 'i look', not 'i looks' + + if (vb.lookBehind('(i|we) (#Adverb|#Verb)?$').found) { + str = obj.Infinitive; + } + + if (str) { + //awkward support for present-participle form + // -- should we support 'have been swimming' ➔ 'am swimming' + if (parsed.auxiliary.has('(have|had) been')) { + parsed.auxiliary.replace('(have|had) been', 'am being'); + + if (obj.Particle) { + str = obj.Particle || obj.PastTense; + } + + return; + } + + parsed.verb.replaceWith(str, false); + parsed.verb.tag('PresentTense'); + parsed = makeNeutral(parsed); // avoid 'he would walks' + + parsed.auxiliary.remove('#Modal'); + } + }); + return this; + }, + + /** walk ➔ will walk*/ + toFutureTense: function toFutureTense() { + var _this7 = this; + + this.forEach(function (vb) { + var parsed = parse$2(vb); // 'i should drive' is already future-enough + + if (useParticiple$1(parsed)) { + return; + } + + var str = conjugate_1$1(parsed, _this7.world).FutureTense; + + if (str) { + parsed = makeNeutral(parsed); // avoid 'he would will go' + + parsed.auxiliary.remove('#Modal'); + parsed.verb.replaceWith(str, false); + parsed.verb.tag('FutureTense'); + } + }); + return this; + }, + + /** walks ➔ walk */ + toInfinitive: function toInfinitive() { + var _this8 = this; + + this.forEach(function (vb) { + var parsed = parse$2(vb); + + var str = conjugate_1$1(parsed, _this8.world).Infinitive; + + if (str) { + vb.replaceWith(str, false); + vb.tag('Infinitive'); + } + }); + return this; + }, + + /** walk ➔ walking */ + toGerund: function toGerund() { + var _this9 = this; + + this.forEach(function (vb) { + var parsed = parse$2(vb); + + var str = conjugate_1$1(parsed, _this9.world).Gerund; + + if (str) { + vb.replaceWith(str, false); + vb.tag('Gerund'); + } + }); + return this; + }, + + /** drive ➔ driven - naked past-participle if it exists, otherwise past-tense */ + toParticiple: function toParticiple() { + var _this10 = this; + + this.forEach(function (vb) { + var parsed = parse$2(vb); + var noAux = !parsed.auxiliary.found; + + _toParticiple(parsed, _this10.world); // dirty trick to ensure our new auxiliary is found + + + if (noAux) { + parsed.verb.prepend(parsed.auxiliary.text()); + parsed.auxiliary.remove(); + } + }); + return this; + }, + /// Negation + + /** return only verbs with 'not'*/ + isNegative: function isNegative() { + return this["if"]('#Negative'); + }, + + /** return only verbs without 'not'*/ + isPositive: function isPositive() { + return this.ifNo('#Negative'); + }, + + /** add a 'not' to these verbs */ + toNegative: function toNegative() { + var _this11 = this; + + this.list.forEach(function (p) { + var doc = _this11.buildFrom([p]); + + var parsed = parse$2(doc); + + toNegative_1(parsed, doc.world); + }); + return this; + }, + + /** remove 'not' from these verbs */ + toPositive: function toPositive() { + var m = this.match('do not #Verb'); + + if (m.found) { + m.remove('do not'); + } + + return this.remove('#Negative'); + } + }; + + var addMethod$9 = function addMethod(Doc) { + /** */ + var Verbs = /*#__PURE__*/function (_Doc) { + _inherits(Verbs, _Doc); + + var _super = _createSuper(Verbs); + + function Verbs() { + _classCallCheck(this, Verbs); + + return _super.apply(this, arguments); + } + + return Verbs; + }(Doc); // add-in our methods + + + Object.assign(Verbs.prototype, methods$7); // aliases + + Verbs.prototype.negate = Verbs.prototype.toNegative; + + Doc.prototype.verbs = function (n) { + var match = this.match('(#Adverb|#Auxiliary|#Verb|#Negative|#Particle)+'); // try to ignore leading and trailing adverbs + + match = match.not('^#Adverb+'); + match = match.not('#Adverb+$'); // handle commas: + // don't split 'really, really' + + var keep = match.match('(#Adverb && @hasComma) #Adverb'); // // but split the other commas + + var m = match.not(keep).splitAfter('@hasComma'); // // combine them back together + + m = m.concat(keep); + m.sort('index'); //handle slashes? + //ensure there's actually a verb + + m = m["if"]('#Verb'); // the reason he will is ... + + if (m.has('(is|was)$')) { + m = m.splitBefore('(is|was)$'); + } //grab (n)th result + + + if (typeof n === 'number') { + m = m.get(n); + } + + var vb = new Verbs(m.list, this, this.world); + return vb; + }; + + return Doc; + }; + + var Verbs = addMethod$9; + + var addMethod$a = function addMethod(Doc) { + /** */ + var People = /*#__PURE__*/function (_Doc) { + _inherits(People, _Doc); + + var _super = _createSuper(People); + + function People() { + _classCallCheck(this, People); + + return _super.apply(this, arguments); + } + + return People; + }(Doc); + + Doc.prototype.people = function (n) { + var match = this.splitAfter('@hasComma'); + match = match.match('#Person+'); //grab (n)th result + + if (typeof n === 'number') { + match = match.get(n); + } + + return new People(match.list, this, this.world); + }; + + return Doc; + }; + + var People = addMethod$a; + + var subclass = [Abbreviations, Acronyms, Clauses, Contractions, Lists, Nouns, Parentheses, Possessives, Quotations, Verbs, People]; + + var extend = function extend(Doc) { + // add basic methods + Object.keys(_simple).forEach(function (k) { + return Doc.prototype[k] = _simple[k]; + }); // add subclassed methods + + subclass.forEach(function (addFn) { + return addFn(Doc); + }); + return Doc; + }; + + var Subset = extend; + + var methods$8 = { + misc: methods$4, + selections: _simple + }; + /** a parsed text object */ + + var Doc = /*#__PURE__*/function () { + function Doc(list, from, world) { + var _this = this; + + _classCallCheck(this, Doc); + + this.list = list; //quiet these properties in console.logs + + Object.defineProperty(this, 'from', { + enumerable: false, + value: from, + writable: true + }); //borrow some missing data from parent + + if (world === undefined && from !== undefined) { + world = from.world; + } //'world' getter + + + Object.defineProperty(this, 'world', { + enumerable: false, + value: world, + writable: true + }); //fast-scans for our data + + Object.defineProperty(this, '_cache', { + enumerable: false, + writable: true, + value: {} + }); //'found' getter + + Object.defineProperty(this, 'found', { + get: function get() { + return _this.list.length > 0; + } + }); //'length' getter + + Object.defineProperty(this, 'length', { + get: function get() { + return _this.list.length; + } + }); // this is way easier than .constructor.name... + + Object.defineProperty(this, 'isA', { + get: function get() { + return 'Doc'; + } + }); + } + /** run part-of-speech tagger on all results*/ + + + _createClass(Doc, [{ + key: "tagger", + value: function tagger() { + return _02Tagger(this); + } + /** pool is stored on phrase objects */ + + }, { + key: "pool", + value: function pool() { + if (this.list.length > 0) { + return this.list[0].pool; + } + + return this.all().list[0].pool; + } + }]); + + return Doc; + }(); + /** create a new Document object */ + + + Doc.prototype.buildFrom = function (list) { + list = list.map(function (p) { + return p.clone(true); + }); // new this.constructor() + + var doc = new Doc(list, this, this.world); + return doc; + }; + /** create a new Document from plaintext. */ + + + Doc.prototype.fromText = function (str) { + var list = _01Tokenizer(str, this.world, this.pool()); + return this.buildFrom(list); + }; + + Object.assign(Doc.prototype, methods$8.misc); + Object.assign(Doc.prototype, methods$8.selections); //add sub-classes + + Subset(Doc); //aliases + + var aliases$1 = { + untag: 'unTag', + and: 'match', + notIf: 'ifNo', + only: 'if', + onlyIf: 'if' + }; + Object.keys(aliases$1).forEach(function (k) { + return Doc.prototype[k] = Doc.prototype[aliases$1[k]]; + }); + var Doc_1 = Doc; + + var smallTagger = function smallTagger(doc) { + var terms = doc.termList(); + _01Lexicon(terms, doc.world); + return doc; + }; + + var tiny = smallTagger; + + function instance(worldInstance) { + //blast-out our word-lists, just once + var world = worldInstance; + /** parse and tag text into a compromise object */ + + var nlp = function nlp() { + var text = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ''; + var lexicon = arguments.length > 1 ? arguments[1] : undefined; + + if (lexicon) { + world.addWords(lexicon); + } + + var list = _01Tokenizer(text, world); + var doc = new Doc_1(list, null, world); + doc.tagger(); + return doc; + }; + + nlp.similar = similar_text; + /** parse text into a compromise object, without running POS-tagging */ + + + nlp.tokenize = function () { + var text = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ''; + var lexicon = arguments.length > 1 ? arguments[1] : undefined; + var w = world; + + if (lexicon) { + w = w.clone(); + w.words = {}; + w.addWords(lexicon); + } + + var list = _01Tokenizer(text, w); + var doc = new Doc_1(list, null, w); + + if (lexicon) { + tiny(doc); + } + + return doc; + }; + /** mix in a compromise-plugin */ + + + nlp.extend = function (fn) { + fn(Doc_1, world, this, Phrase_1, Term_1, Pool_1); + return this; + }; + /** create a compromise Doc object from .json() results */ + + + nlp.fromJSON = function (json) { + var list = fromJSON_1(json, world); + return new Doc_1(list, null, world); + }; + /** make a deep-copy of the library state */ + + + nlp.clone = function () { + return instance(world.clone()); + }; + /** log our decision-making for debugging */ + + + nlp.verbose = function () { + var bool = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : true; + world.verbose(bool); + return this; + }; + /** grab currently-used World object */ + + + nlp.world = function () { + return world; + }; + /** pre-parse any match statements */ + + + nlp.parseMatch = function (str) { + return syntax_1(str); + }; + /** current version of the library */ + + + nlp.version = _version; // alias + + nlp["import"] = nlp.load; + return nlp; + } + + var src = instance(new World_1()); + + return src; + +})));