tokenize method

List<Token> tokenize(
  1. String source
)

Tokenizes source and returns the resulting token list ending with an TokenKind.eof sentinel.

Implementation

List<Token> tokenize(String source) {
  final tokens = <Token>[];
  var pos = 0;
  var line = 1;
  var lineStart = 0;

  while (pos < source.length) {
    final startLine = line;
    final startCol = pos - lineStart + 1;
    final char = source[pos];

    // ── Whitespace (horizontal) ─────────────────────────────────────────
    if (char == ' ' || char == '\t' || char == '\r') {
      pos++;
      continue;
    }

    // ── Newline ─────────────────────────────────────────────────────────
    if (char == '\n') {
      tokens.add(Token(TokenKind.newline, '\n', startLine, startCol));
      pos++;
      line++;
      lineStart = pos;
      continue;
    }

    // ── Line comment ────────────────────────────────────────────────────
    if (char == '/' && pos + 1 < source.length && source[pos + 1] == '/') {
      while (pos < source.length && source[pos] != '\n') {
        pos++;
      }
      continue;
    }

    // ── String literal ──────────────────────────────────────────────────
    if (char == '"' || char == "'") {
      final quote = char;
      final buf = StringBuffer()..write(char);
      pos++;
      while (pos < source.length && source[pos] != quote) {
        if (source[pos] == '\\' && pos + 1 < source.length) {
          buf
            ..write(source[pos])
            ..write(source[pos + 1]);
          pos += 2;
        } else {
          buf.write(source[pos]);
          pos++;
        }
      }
      if (pos < source.length) {
        buf.write(source[pos]); // closing quote
        pos++;
      }
      tokens.add(
        Token(TokenKind.string, buf.toString(), startLine, startCol),
      );
      continue;
    }

    // ── Integer literal ─────────────────────────────────────────────────
    if (_isDigit(char)) {
      final start = pos;
      while (pos < source.length && _isDigit(source[pos])) {
        pos++;
      }
      tokens.add(
        Token(
          TokenKind.integer,
          source.substring(start, pos),
          startLine,
          startCol,
        ),
      );
      continue;
    }

    // ── Double @@ ───────────────────────────────────────────────────────
    if (char == '@' && pos + 1 < source.length && source[pos + 1] == '@') {
      tokens.add(Token(TokenKind.doubleAt, '@@', startLine, startCol));
      pos += 2;
      continue;
    }

    // ── Identifier (ASCII or non-ASCII) ─────────────────────────────────
    // Collect any run of characters that are not known punctuation/whitespace
    // so that unicode identifiers become a single token and can be reported
    // as errors by the parser.
    if (_isIdentStartChar(char) || _isNonAscii(char)) {
      final start = pos;
      while (pos < source.length &&
          (_isIdentChar(source[pos]) || _isNonAscii(source[pos]))) {
        pos++;
      }
      tokens.add(
        Token(
          TokenKind.identifier,
          source.substring(start, pos),
          startLine,
          startCol,
        ),
      );
      continue;
    }

    // ── Single-character punctuation ────────────────────────────────────
    final kind = _singleCharKind(char);
    if (kind != null) {
      tokens.add(Token(kind, char, startLine, startCol));
      pos++;
      continue;
    }

    // ── Unknown character — skip silently ───────────────────────────────
    pos++;
  }

  final eofCol = pos - lineStart + 1;
  tokens.add(Token(TokenKind.eof, '', line, eofCol));
  return tokens;
}