// WHY DO WE FIRST GET THE AVERAGE, WHEN WE HAVENT DEFINED TOKENS?
// IN THIS FUNCTION DOES IT MATTER IF WE CALL THIS ARGUMENT 'TOKENS' OR SOMETHING ELSE, SINCE WE DEFINE THAT VARIABLE
// LATER IN THE FUNCTION 'reportOnText()'?

function getAverageWordLength(tokens) {
  // join all tokens together to create one big string
  // then divide that total length by the number
  // of tokens to get average
  var totalLength = tokens.join('').length;
  return (totalLength / tokens.length).toFixed(2);
}

// WHY THEN COUNT THE DISTINCT WORDS?
// Note: distinct words will count the words without counting if their repeated, so that's why its unique
// its not counting words that are only mentioned once.

function countDistinctWords(tokens) {
  // one way to solve this problem is by using a Set object
  // (https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Set)
  // as in the commented out lines below. we haven't covered sets in this
  // course, so we implement a different approach
  //
  //   var distinctWords = new Set(tokens);
  //   return distinctWords.size;

  // instead of using a set, we'll create an empty array of distinct words.
  // then we'll iterate over our tokens and check to see if the token is already
  // in distinct words. if it is we do nothing, if it's not we add it to our
  // list of distinct words
  var distinctWords = [];
  for (var i = 0; i < tokens.length; i++) {
    if (distinctWords.indexOf(tokens[i]) === -1) {
      // .indexOf() searches in the array which position whatever arguments is passed is at
      // in this case 'tokens[i]', and it will equal '-1' if it hasn't occurred, so if it hasn't
      // it will push that 'token' (word) into the counter
      distinctWords.push(tokens[i]);
    }
  }
  return distinctWords.length;
}

// this is a naive implementation of text tokenization
// https://en.wikipedia.org/wiki/Tokenization_(lexical_analysis).
// the goal is to standardize some of the differences between
// words in a text by converting all to lowercase, removing punctuation
// etc., so that, for instance, the "there" in 'it is there.' or "it is ThErE " or 'its There'
// would all be converted into the same value ("there")

// THE TWO FUNCTIONS BELOW CLEAN TEXT
// WHY TOKENIZE AFTER PREVIOUS FUNCTIONS?

function tokenizeText(text) {
  return text
    .toLowerCase()
    .match(/\b[^\s]+\b/g)
    .sort();
}
// .toLowerCase() will make them have all lower case letters so it can count them once even if they're capitalized
// need to undertand the .match() method with the '/\b[^\s]+\b/g'
// .sort() will sort alphabetically

function removeReturns(text) {
  return text.replace(/\r?\n|\r/g, '');
}
// the .replace() method will replace first arg = '/\r?\n|\r/g' with the second "" interpreting any special characters
// that the system may interpret differently based on the keyboard as spaces instead since they don't mean anything
// useful and could be something like hitting tab or the space bar

// generate and display analytics on text

function reportOnText(text) {
  // tokenize our text then compute our data points

  var tokens = tokenizeText(text);
  var numTotalWords = tokens.length;
  var numDistinctWords = countDistinctWords(tokens);
  var averageWordLength = getAverageWordLength(tokens);

  // take our data and display it in the dom
  var textReport = $('.js-text-report');
  textReport.find('.js-word-count').text(numTotalWords);
  textReport.find('.js-unique-word-count').text(numDistinctWords);
  textReport
    .find('.js-average-word-length')
    .text(averageWordLength + ' characters');
  textReport.removeClass('hidden');
}

// Watch for and handle form submissions
function watchFormSubmission() {
  $('.js-text-form').submit(function (event) {
    event.preventDefault();
    // get the text the user submitted
    var userText = $(this).find('#user-text').val();
    reportOnText(removeReturns(userText));
  });
}

// equivalent to `$(document).ready(function() {...})`
$(function () {
  watchFormSubmission();
});