tokenize method

List<Token> tokenize()

Implementation

List<Token> tokenize() {
  final tokens = <Token>[];
  while (_pos < input.length) {
    final c = input[_pos];

    if (_isWhitespace(c)) {
      _pos++;
      continue;
    }

    // Single-line comment: -- ...
    if (c == '-' && _peek(1) == '-') {
      while (_pos < input.length && input[_pos] != '\n') _pos++;
      continue;
    }

    // Multi-line comment: /* ... */
    if (c == '/' && _peek(1) == '*') {
      _pos += 2;
      while (_pos < input.length - 1 &&
          !(input[_pos] == '*' && input[_pos + 1] == '/')) {
        _pos++;
      }
      _pos += 2; // skip */
      continue;
    }

    if (_isLetter(c) || c == '_') {
      tokens.add(_readWord());
      continue;
    }

    if (_isDigit(c) || (c == '-' && _isDigit(_peek(1) ?? ''))) {
      tokens.add(_readNumber());
      continue;
    }

    if (c == '"' || c == "'") {
      tokens.add(_readString(c));
      continue;
    }

    if ('=<>!'.contains(c)) {
      tokens.add(_readOperator());
      continue;
    }

    // Plus/minus as arithmetic operators
    if (c == '+') {
      _pos++;
      tokens.add(Token(TokenType.operator, '+'));
      continue;
    }
    // '*' is an operator only when it follows an operand (identifier, number, ')').
    // Otherwise it is a symbol (SELECT *, COUNT(*), etc.)
    if (c == '*') {
      final isArithStar = tokens.isNotEmpty &&
          (tokens.last.type == TokenType.identifier ||
           tokens.last.type == TokenType.number ||
           tokens.last.value == ')');
      _pos++;
      tokens.add(Token(
        isArithStar ? TokenType.operator : TokenType.symbol,
        '*',
      ));
      continue;
    }

    if (',();./%'.contains(c)) {
      tokens.add(Token(TokenType.symbol, c));
      _pos++;
      continue;
    }

    throw FormatException('Unexpected character: "$c" at position $_pos');
  }
  tokens.add(Token(TokenType.eof, ''));
  return tokens;
}