trimNoise function

Offsets trimNoise(
  1. String text,
  2. Offsets offsets
)

Detects noise surrounding the source code and adjusts initial and ending offsets to ignore the noise.

The following things are considered noise:

  • Hash bang: e.g. "#!/usr/bin/env node"
  • HTML comment:
  • HTML cdata tag:

Implementation

Offsets trimNoise(String text, Offsets offsets) {
  int index = offsets.start;
  int end = offsets.end;
  int currentLine = offsets.line;
  bool lookahead(String str) {
    if (index + str.length > end) return false;
    for (int i = 0; i < str.length; i++) {
      if (text.codeUnitAt(index + i) != str.codeUnitAt(i)) return false;
    }
    return true;
  }

  bool lookback(String str) {
    if (str.length > end) return false;
    for (int i = 0; i < str.length; i++) {
      if (text.codeUnitAt(end - str.length + i) != str.codeUnitAt(i)) {
        return false;
      }
    }
    return true;
  }

  int next() {
    ++index;
    return index == end ? char.NULL : text.codeUnitAt(index);
  }

  // Skip line with #!
  if (lookahead('#!')) {
    index += 2;
    while (index < end && !isEOL(text.codeUnitAt(index))) {
      ++index;
    }
  }

  // Skip whitespace until potential HTML comment marker
  loop:
  while (true) {
    int x = text.codeUnitAt(index);
    switch (x) {
      case char.LF:
      case char.LS:
      case char.PS:
        currentLine += 1;
        x = next();
        break;
      case char.CR:
        currentLine += 1;
        x = next();
        if (x == char.LF) {
          x = next();
        }
        break;
      default:
        if (isWhitespace(x)) {
          x = next();
        } else {
          break loop;
        }
    }
  }

  // Skip <!-- and <![CDATA[
  if (lookahead('<!--')) {
    index += '<!--'.length;
  } else if (lookahead('<![CDATA[')) {
    index += '<![CDATA['.length;
  }

  // Skip suffix whitespace (this is simpler than above since we do not need to update the line counter)
  while (end > 0) {
    int x = text.codeUnitAt(end - 1);
    if (!isWhitespace(x) && !isEOL(x)) break;
    --end;
  }

  // Check for trailing --> or ]]>
  if (lookback('-->')) {
    end -= '-->'.length;
  } else if (lookback(']]>')) {
    end -= ']]>'.length;
  }

  return Offsets(index, end, currentLine);
}