tokenize static method

List<Token> tokenize(
  1. Scanner scanner
)

Parse tokens from a string.

Implementation

static List<Token> tokenize(Scanner scanner) {
	var tokens = <Token>[];
	// skip leading whitespace...
	scanner.skipWhitespace();
	/// ...and parse away!
	scanner.chainParse({
		// comments
		"<#": () {
			// save position and parse comment
			var position = scanner.position;
			var comment = scanner.matchBrackets("<#", "#>").trim();
			// add to mound
			tokens.add(Token(TokenType.comment, position, text: comment));
		},
		// declarations
		"<!": () {
			// save position
			var position = scanner.position;
			// skip opener
			scanner.seek(2);
			scanner.skipWhitespace();
			// read name
			var name = scanner.parseName(NameScannerMode.tagOpen);
			// read attributes
			var attributes = scanner.parseAttributes();
			// add to mound
			tokens.add(Token(TokenType.declaration, position, name: name, attributes: attributes));
		},
		// openers
		"<": () {
			// save position
			var position = scanner.position;
			// skip opener
			scanner.seek();
			scanner.skipWhitespace();
			// read name
			var name = scanner.parseName(NameScannerMode.tagOpen);
			// read attributes
			var attributes = scanner.parseAttributes();
			// check if self-closing
			var closing = scanner.peek(-2) == "/>";
			// add to mound
			tokens.add(Token(TokenType.tagOpen, position, name: name, attributes: attributes, selfClosing: closing));
		},
		// closers
		"</": () {
			// save position
			var position = scanner.position;
			// skip opener
			scanner.seek(2);
			scanner.skipWhitespace();
			// read name
			var name = scanner.parseName(NameScannerMode.tagOpen);
			// skip trailing whitespace and check for closing bracket
			scanner.skipWhitespace();
			if (scanner.matches(">")) {
				scanner.seek();
			} else {
				// if we don't find it, throw an exception
				throw scanner.unexpectedSymbol();
			}
			// add to mound
			tokens.add(Token(TokenType.tagClose, position, name: name));
		},
		// cdata blocks
		"[!CDATA]": () {
			// save position, read text
			var position = scanner.position;
			var cdata = scanner.matchBrackets("[!CDATA]", "[!CEND]");
			// add to mound
			tokens.add(Token(TokenType.cdata, position, text: cdata));
		},
		// raw text
		"": () {
			// save position
			var position = scanner.position;
			// read text
			var text = "";
			while (scanner.inBounds) {
				// escape
				if (scanner.matches("\\")) {
					// skip character and next backslash
					if (scanner.seek(2)) {
						// append escaped character
						text += scanner.peek(-1);
					} else {
						// do not allow backslash at eof
						throw "backslash at eof";
					}
				}
				// special characters
				else if (scanner.matches("[!CDATA]") || scanner.matches("<")) {
					break;
				}
				// other raw text
				else {
					// append character
					text += scanner.peek();
					scanner.seek();
				}
			}
			// trim whitespace
			text = text.trim();
			// if the text is now empty, ignore it
			if (text.isEmpty) {
				return;
			}

			tokens.add(Token(TokenType.text, position, text: compact(text)));
		}
	});

	return tokens;
}