tokenize function

List<HighlightSpan> tokenize(
  1. String code,
  2. String language
)

Tokenize source code into highlighted spans. Uses a rule-based approach — not a full parser, but good enough for display purposes.

Implementation

List<HighlightSpan> tokenize(String code, String language) {
  if (language == 'plaintext') {
    return [HighlightSpan(code, HighlightTokenType.plain)];
  }

  final rules = _getRules(language);
  if (rules.isEmpty) {
    return [HighlightSpan(code, HighlightTokenType.plain)];
  }

  final spans = <HighlightSpan>[];
  var lastIndex = 0;
  var i = 0;

  while (i < code.length) {
    HighlightTokenType? matchType;
    String? matchText;

    for (final rule in rules) {
      final match = rule.pattern.matchAsPrefix(code, i);
      if (match != null && match.end > i) {
        matchType = rule.type;
        matchText = match[0]!;
        break;
      }
    }

    if (matchType != null && matchText != null) {
      // Emit plain text before this match
      if (i > lastIndex) {
        spans.add(
          HighlightSpan(code.substring(lastIndex, i), HighlightTokenType.plain),
        );
      }
      spans.add(HighlightSpan(matchText, matchType));
      i += matchText.length;
      lastIndex = i;
    } else {
      i++;
    }
  }

  // Remaining plain text
  if (lastIndex < code.length) {
    spans.add(
      HighlightSpan(code.substring(lastIndex), HighlightTokenType.plain),
    );
  }

  return spans;
}