// WHY DO WE FIRST GET THE AVERAGE, WHEN WE HAVENT DEFINED TOKENS? // IN THIS FUNCTION DOES IT MATTER IF WE CALL THIS ARGUMENT 'TOKENS' OR SOMETHING ELSE, SINCE WE DEFINE THAT VARIABLE // LATER IN THE FUNCTION 'reportOnText()'? function getAverageWordLength(tokens) { // join all tokens together to create one big string // then divide that total length by the number // of tokens to get average var totalLength = tokens.join('').length; return (totalLength / tokens.length).toFixed(2); } // WHY THEN COUNT THE DISTINCT WORDS? // Note: distinct words will count the words without counting if their repeated, so that's why its unique // its not counting words that are only mentioned once. function countDistinctWords(tokens) { // one way to solve this problem is by using a Set object // (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set) // as in the commented out lines below. we haven't covered sets in this // course, so we implement a different approach // // var distinctWords = new Set(tokens); // return distinctWords.size; // instead of using a set, we'll create an empty array of distinct words. // then we'll iterate over our tokens and check to see if the token is already // in distinct words. if it is we do nothing, if it's not we add it to our // list of distinct words var distinctWords = []; for (var i = 0; i < tokens.length; i++) { if (distinctWords.indexOf(tokens[i]) === -1) { // .indexOf() searches in the array which position whatever arguments is passed is at // in this case 'tokens[i]', and it will equal '-1' if it hasn't occurred, so if it hasn't // it will push that 'token' (word) into the counter distinctWords.push(tokens[i]); } } return distinctWords.length; } // this is a naive implementation of text tokenization // https://en.wikipedia.org/wiki/Tokenization_(lexical_analysis). // the goal is to standardize some of the differences between // words in a text by converting all to lowercase, removing punctuation // etc., so that, for instance, the "there" in 'it is there.' or "it is ThErE " or 'its There' // would all be converted into the same value ("there") // THE TWO FUNCTIONS BELOW CLEAN TEXT // WHY TOKENIZE AFTER PREVIOUS FUNCTIONS? function tokenizeText(text) { return text .toLowerCase() .match(/\b[^\s]+\b/g) .sort(); } // .toLowerCase() will make them have all lower case letters so it can count them once even if they're capitalized // need to undertand the .match() method with the '/\b[^\s]+\b/g' // .sort() will sort alphabetically function removeReturns(text) { return text.replace(/\r?\n|\r/g, ''); } // the .replace() method will replace first arg = '/\r?\n|\r/g' with the second "" interpreting any special characters // that the system may interpret differently based on the keyboard as spaces instead since they don't mean anything // useful and could be something like hitting tab or the space bar // generate and display analytics on text function reportOnText(text) { // tokenize our text then compute our data points var tokens = tokenizeText(text); var numTotalWords = tokens.length; var numDistinctWords = countDistinctWords(tokens); var averageWordLength = getAverageWordLength(tokens); // take our data and display it in the dom var textReport = $('.js-text-report'); textReport.find('.js-word-count').text(numTotalWords); textReport.find('.js-unique-word-count').text(numDistinctWords); textReport .find('.js-average-word-length') .text(averageWordLength + ' characters'); textReport.removeClass('hidden'); } // Watch for and handle form submissions function watchFormSubmission() { $('.js-text-form').submit(function (event) { event.preventDefault(); // get the text the user submitted var userText = $(this).find('#user-text').val(); reportOnText(removeReturns(userText)); }); } // equivalent to `$(document).ready(function() {...})` $(function () { watchFormSubmission(); });