tokenize static method
Parse tokens from a string.
Implementation
static List<Token> tokenize(Scanner scanner) {
var tokens = <Token>[];
// skip leading whitespace...
scanner.skipWhitespace();
/// ...and parse away!
scanner.chainParse({
// comments
"<#": () {
// save position and parse comment
var position = scanner.position;
var comment = scanner.matchBrackets("<#", "#>").trim();
// add to mound
tokens.add(Token(TokenType.comment, position, text: comment));
},
// declarations
"<!": () {
// save position
var position = scanner.position;
// skip opener
scanner.seek(2);
scanner.skipWhitespace();
// read name
var name = scanner.parseName(NameScannerMode.tagOpen);
// read attributes
var attributes = scanner.parseAttributes();
// add to mound
tokens.add(Token(TokenType.declaration, position, name: name, attributes: attributes));
},
// openers
"<": () {
// save position
var position = scanner.position;
// skip opener
scanner.seek();
scanner.skipWhitespace();
// read name
var name = scanner.parseName(NameScannerMode.tagOpen);
// read attributes
var attributes = scanner.parseAttributes();
// check if self-closing
var closing = scanner.peek(-2) == "/>";
// add to mound
tokens.add(Token(TokenType.tagOpen, position, name: name, attributes: attributes, selfClosing: closing));
},
// closers
"</": () {
// save position
var position = scanner.position;
// skip opener
scanner.seek(2);
scanner.skipWhitespace();
// read name
var name = scanner.parseName(NameScannerMode.tagOpen);
// skip trailing whitespace and check for closing bracket
scanner.skipWhitespace();
if (scanner.matches(">")) {
scanner.seek();
} else {
// if we don't find it, throw an exception
throw scanner.unexpectedSymbol();
}
// add to mound
tokens.add(Token(TokenType.tagClose, position, name: name));
},
// cdata blocks
"[!CDATA]": () {
// save position, read text
var position = scanner.position;
var cdata = scanner.matchBrackets("[!CDATA]", "[!CEND]");
// add to mound
tokens.add(Token(TokenType.cdata, position, text: cdata));
},
// raw text
"": () {
// save position
var position = scanner.position;
// read text
var text = "";
while (scanner.inBounds) {
// escape
if (scanner.matches("\\")) {
// skip character and next backslash
if (scanner.seek(2)) {
// append escaped character
text += scanner.peek(-1);
} else {
// do not allow backslash at eof
throw "backslash at eof";
}
}
// special characters
else if (scanner.matches("[!CDATA]") || scanner.matches("<")) {
break;
}
// other raw text
else {
// append character
text += scanner.peek();
scanner.seek();
}
}
// trim whitespace
text = text.trim();
// if the text is now empty, ignore it
if (text.isEmpty) {
return;
}
tokens.add(Token(TokenType.text, position, text: compact(text)));
}
});
return tokens;
}