tokenize method

List<Token> tokenize()

Implementation

List<Token> tokenize() {
  final tokens = <Token>[];
  while (_pos < input.length) {
    final c = input[_pos];

    if (_isWhitespace(c)) {
      _pos++;
      continue;
    }

    // Single-line comment: -- ...
    if (c == '-' && _peek(1) == '-') {
      while (_pos < input.length && input[_pos] != '\n') _pos++;
      continue;
    }

    // Multi-line comment: /* ... */
    if (c == '/' && _peek(1) == '*') {
      _pos += 2;
      while (_pos < input.length - 1 &&
          !(input[_pos] == '*' && input[_pos + 1] == '/')) {
        _pos++;
      }
      _pos += 2; // skip */
      continue;
    }

    if (_isLetter(c) || c == '_') {
      tokens.add(_readWord());
      continue;
    }

    // A leading '-' is treated as the start of a negative-number literal ONLY
    // when the previous token cannot be the end of an expression (i.e. it is
    // NOT an identifier, a number, ')' or a boolean/null keyword).
    // In all other positions '-' is an arithmetic subtraction operator.
    if (_isDigit(c)) {
      tokens.add(_readNumber());
      continue;
    }
    if (c == '-' && _isDigit(_peek(1) ?? '')) {
      final prev = tokens.isNotEmpty ? tokens.last : null;
      final prevIsExprEnd = prev != null &&
          (prev.type == TokenType.identifier ||
           prev.type == TokenType.number ||
           prev.value == ')' ||
           prev.value == 'true' ||
           prev.value == 'false' ||
           prev.value == 'null');
      if (!prevIsExprEnd) {
        tokens.add(_readNumber());
        continue;
      }
      // Fall through: emit as subtraction operator below
    }

    if (c == '"' || c == "'") {
      tokens.add(_readString(c));
      continue;
    }

    if ('=<>!'.contains(c)) {
      tokens.add(_readOperator());
      continue;
    }

    // Plus/minus as arithmetic operators
    if (c == '+') {
      _pos++;
      tokens.add(Token(TokenType.operator, '+'));
      continue;
    }
    if (c == '-') {
      // At this point we know it is NOT the start of a negative number literal
      // (that case was handled above and either consumed or fell through here).
      _pos++;
      tokens.add(Token(TokenType.operator, '-'));
      continue;
    }
    // '*' is an operator only when it follows an operand (identifier, number, ')').
    // Otherwise it is a symbol (SELECT *, COUNT(*), etc.)
    if (c == '*') {
      final isArithStar = tokens.isNotEmpty &&
          (tokens.last.type == TokenType.identifier ||
           tokens.last.type == TokenType.number ||
           tokens.last.value == ')');
      _pos++;
      tokens.add(Token(
        isArithStar ? TokenType.operator : TokenType.symbol,
        '*',
      ));
      continue;
    }

    if (',();./%'.contains(c)) {
      tokens.add(Token(TokenType.symbol, c));
      _pos++;
      continue;
    }

    throw FormatException('Unexpected character: "$c" at position $_pos');
  }
  tokens.add(Token(TokenType.eof, ''));
  return tokens;
}