misspell method

int misspell(
  1. int sym,
  2. int tok
)

Implementation

int misspell(int sym, int tok) {
  //
  // Set up the two strings in question. Note that there is a "0"
  // gate added at the end of each string. This is important as
  // the algorithm assumes that it can "peek" at the symbol immediately
  // following the one that is being analysed.
  //
  var s1 = name(terminalIndex(sym)).toLowerCase();
  var n = s1.length;
  s1 += '\u0000';

  var s2 = (tokStream.getName(tok)).toLowerCase();
  var m = (s2.length < MAX_NAME_LENGTH ? s2.length : MAX_NAME_LENGTH);
  s2 = s2.substring(0, m) + '\u0000';

  //
  //  Singleton misspellings:
  //
  //  ;      <---->     ,
  //
  //  ;      <---->     :
  //
  //  .      <---->     ,
  //
  //  '      <---->     "
  //
  //
  if (n == 1 && m == 1) {
    if ((s1[0] == ';' && s2[0] == ',') ||
        (s1[0] == ',' && s2[0] == ';') ||
        (s1[0] == ';' && s2[0] == ':') ||
        (s1[0] == ':' && s2[0] == ';') ||
        (s1[0] == '.' && s2[0] == ',') ||
        (s1[0] == ',' && s2[0] == '.') ||
        (s1[0] == '\'' && s2[0] == '\"') ||
        (s1[0] == '\"' && s2[0] == '\'')) {
      return 3;
    }
  }

  //
  // Scan the two strings. Increment "match" count for each match.
  // When a transposition is encountered, increase "match" count
  // by two but count it as one error. When a typo is found, skip
  // it and count it as one error. Otherwise we have a mismatch; if
  // one of the strings is longer, increment its index, otherwise,
  // increment both indices and continue.
  //
  // This algorithm is an adaptation of a bool misspelling
  // algorithm proposed by Juergen Uhl.
  //
  var count = 0, prefix_length = 0, num_errors = 0;

  var i = 0, j = 0;
  while ((i < n) && (j < m)) {
    if (s1[i] == s2[j]) {
      count++;
      i++;
      j++;
      if (num_errors == 0) prefix_length++;
    } else if (s1[i + 1] == s2[j] && s1[i] == s2[j + 1]) // transposition
    {
      count += 2;
      i += 2;
      j += 2;
      num_errors++;
    } else if (s1[i + 1] == s2[j + 1]) // mismatch
    {
      i += 2;
      j += 2;
      num_errors++;
    } else {
      if ((n - i) > (m - j)) {
        i++;
      } else if ((m - j) > (n - i)) {
        j++;
      } else {
        i++;
        j++;
      }

      num_errors++;
    }
  }

  if (i < n || j < m) num_errors++;

  if (num_errors > ((n < m ? n : m) / 6 + 1)) count = prefix_length;

  return (count * 10 / ((n < s1.length ? s1.length : n) + num_errors))
      .floor();
}