tokenize method
Tokenizes source and returns the resulting token list ending with an
TokenKind.eof sentinel.
Implementation
List<Token> tokenize(String source) {
final tokens = <Token>[];
var pos = 0;
var line = 1;
var lineStart = 0;
while (pos < source.length) {
final startLine = line;
final startCol = pos - lineStart + 1;
final char = source[pos];
// ── Whitespace (horizontal) ─────────────────────────────────────────
if (char == ' ' || char == '\t' || char == '\r') {
pos++;
continue;
}
// ── Newline ─────────────────────────────────────────────────────────
if (char == '\n') {
tokens.add(Token(TokenKind.newline, '\n', startLine, startCol));
pos++;
line++;
lineStart = pos;
continue;
}
// ── Line comment ────────────────────────────────────────────────────
if (char == '/' && pos + 1 < source.length && source[pos + 1] == '/') {
while (pos < source.length && source[pos] != '\n') {
pos++;
}
continue;
}
// ── String literal ──────────────────────────────────────────────────
if (char == '"' || char == "'") {
final quote = char;
final buf = StringBuffer()..write(char);
pos++;
while (pos < source.length && source[pos] != quote) {
if (source[pos] == '\\' && pos + 1 < source.length) {
buf
..write(source[pos])
..write(source[pos + 1]);
pos += 2;
} else {
buf.write(source[pos]);
pos++;
}
}
if (pos < source.length) {
buf.write(source[pos]); // closing quote
pos++;
}
tokens.add(
Token(TokenKind.string, buf.toString(), startLine, startCol),
);
continue;
}
// ── Integer literal ─────────────────────────────────────────────────
if (_isDigit(char)) {
final start = pos;
while (pos < source.length && _isDigit(source[pos])) {
pos++;
}
tokens.add(
Token(
TokenKind.integer,
source.substring(start, pos),
startLine,
startCol,
),
);
continue;
}
// ── Double @@ ───────────────────────────────────────────────────────
if (char == '@' && pos + 1 < source.length && source[pos + 1] == '@') {
tokens.add(Token(TokenKind.doubleAt, '@@', startLine, startCol));
pos += 2;
continue;
}
// ── Identifier (ASCII or non-ASCII) ─────────────────────────────────
// Collect any run of characters that are not known punctuation/whitespace
// so that unicode identifiers become a single token and can be reported
// as errors by the parser.
if (_isIdentStartChar(char) || _isNonAscii(char)) {
final start = pos;
while (pos < source.length &&
(_isIdentChar(source[pos]) || _isNonAscii(source[pos]))) {
pos++;
}
tokens.add(
Token(
TokenKind.identifier,
source.substring(start, pos),
startLine,
startCol,
),
);
continue;
}
// ── Single-character punctuation ────────────────────────────────────
final kind = _singleCharKind(char);
if (kind != null) {
tokens.add(Token(kind, char, startLine, startCol));
pos++;
continue;
}
// ── Unknown character — skip silently ───────────────────────────────
pos++;
}
final eofCol = pos - lineStart + 1;
tokens.add(Token(TokenKind.eof, '', line, eofCol));
return tokens;
}