getTokenArray method
Get token array from Pendart text
Implementation
List<Token> getTokenArray(String text) {
List<Token> tokenArray = [];
bool tokenStartsLine = true;
// State tracking for various formatting elements
bool isInPre = false;
bool isInBold = false;
bool isInItalic = false;
bool isInSuperscript = false;
bool isInSubscript = false;
bool isInStrikethrough = false;
bool isInUnderline = false;
bool isInLink = false;
text = getCleanedText(text, tabulationSpaceCount);
int charIndex = 0;
while (charIndex < text.length) {
Token token = Token();
token.startsLine = tokenStartsLine;
tokenStartsLine = false;
// Handle escaped characters with ¬
if (charIndex < text.length - 1 && text[charIndex] == '¬') {
token.text = text[charIndex + 1];
token.isEscaped = true;
token.type = TokenType.text;
charIndex += 2;
}
// Handle headings
else if (token.startsLine && text.substring(charIndex).startsWith("!")) {
int headingLevel = 0;
while (charIndex < text.length && text[charIndex] == '!') {
headingLevel++;
charIndex++;
}
if (headingLevel >= 1 &&
headingLevel <= 6 &&
charIndex < text.length &&
text[charIndex] == ' ') {
// Skip the space after heading marker
charIndex++;
// Set token type based on heading level
switch (headingLevel) {
case 1:
token.type = TokenType.heading1;
break;
case 2:
token.type = TokenType.heading2;
break;
case 3:
token.type = TokenType.heading3;
break;
case 4:
token.type = TokenType.heading4;
break;
case 5:
token.type = TokenType.heading5;
break;
case 6:
token.type = TokenType.heading6;
break;
}
// Collect heading text
StringBuilder sb = StringBuilder();
while (charIndex < text.length && text[charIndex] != '\n') {
sb.write(text[charIndex]);
charIndex++;
}
token.text = sb.toString();
} else {
token.text = _repeat("!", headingLevel);
token.type = TokenType.text;
}
}
// Handle bold **text**
else if (text.substring(charIndex).startsWith("**")) {
isInBold = !isInBold;
token.type = TokenType.bold;
token.text = ""; // Marker only, no text content
charIndex += 2;
}
// Handle italic %%text%%
else if (text.substring(charIndex).startsWith("%%")) {
isInItalic = !isInItalic;
token.type = TokenType.italic;
token.text = ""; // Marker only, no text content
charIndex += 2;
}
// Handle superscript ^^text^^
else if (text.substring(charIndex).startsWith("^^")) {
isInSuperscript = !isInSuperscript;
token.type = TokenType.superscript;
token.text = ""; // Marker only, no text content
charIndex += 2;
}
// Handle subscript ,,text,,
else if (text.substring(charIndex).startsWith(",,")) {
isInSubscript = !isInSubscript;
token.type = TokenType.subscript;
token.text = ""; // Marker only, no text content
charIndex += 2;
}
// Handle strikethrough ~~text~~
else if (text.substring(charIndex).startsWith("~~")) {
isInStrikethrough = !isInStrikethrough;
token.type = TokenType.strikethrough;
token.text = ""; // Marker only, no text content
charIndex += 2;
}
// Handle underline __text__
else if (text.substring(charIndex).startsWith("__")) {
isInUnderline = !isInUnderline;
token.type = TokenType.underline;
token.text = ""; // Marker only, no text content
charIndex += 2;
}
// Handle code block ::: text :::
else if (text.substring(charIndex).startsWith(":::") && !isInPre) {
isInPre = true;
token.type = TokenType.codeBlock;
token.text = ""; // Start marker, content will be collected separately
charIndex += 3;
} else if (text.substring(charIndex).startsWith(":::") && isInPre) {
isInPre = false;
token.type = TokenType.codeBlock;
token.text = ""; // End marker, content already collected
charIndex += 3;
}
// Handle horizontal rule
else if (text.substring(charIndex).startsWith("---")) {
token.type = TokenType.horizontalRule;
token.text = "";
charIndex += 3;
}
// Handle page break
else if (text.substring(charIndex).startsWith("~~~")) {
token.type = TokenType.pageBreak;
token.text = "";
charIndex += 3;
}
// Handle line break
else if (text.substring(charIndex).startsWith("§")) {
token.type = TokenType.lineBreak;
token.text = "";
charIndex += 1;
}
// Handle checkboxes [] and [x]
else if (token.startsLine && text.substring(charIndex).startsWith("[")) {
if (charIndex + 1 < text.length) {
if (text[charIndex + 1] == ']') {
// Unchecked checkbox
token.type = TokenType.checkbox;
token.attributes["checked"] = "false";
charIndex += 2;
// Get checkbox text
StringBuilder checkboxText = StringBuilder();
int tempCharIndex = charIndex;
while (tempCharIndex < text.length && text[tempCharIndex] != '\n') {
checkboxText.write(text[tempCharIndex]);
tempCharIndex++;
}
token.text = checkboxText.toString().trim();
charIndex = tempCharIndex;
} else if (charIndex + 2 < text.length &&
text[charIndex + 1] == 'x' &&
text[charIndex + 2] == ']') {
// Checked checkbox
token.type = TokenType.checkbox;
token.attributes["checked"] = "true";
charIndex += 3;
// Get checkbox text
StringBuilder checkboxText = StringBuilder();
int tempCharIndex = charIndex;
while (tempCharIndex < text.length && text[tempCharIndex] != '\n') {
checkboxText.write(text[tempCharIndex]);
tempCharIndex++;
}
token.text = checkboxText.toString().trim();
charIndex = tempCharIndex;
} else {
// Not a checkbox, just a regular character
token.type = TokenType.text;
token.text = text[charIndex];
charIndex++;
}
} else {
token.type = TokenType.text;
token.text = text[charIndex];
charIndex++;
}
}
// Handle links
else if (text.substring(charIndex).startsWith("@@")) {
charIndex += 2;
if (!isInLink) {
isInLink = true;
token.type = TokenType.link;
// Extract URL
StringBuilder url = StringBuilder();
while (charIndex < text.length) {
if (text[charIndex] == ' ') {
// URL followed by text
String urlStr = url.toString();
// Ensure URL is a secure remote URL
var (isValid, errorMessage) = _validateUrlSecurity(urlStr);
if (isValid) {
token.attributes["href"] = urlStr;
if (errorMessage != null) {
token.attributes["error"] = errorMessage;
}
} else {
// If not valid, don't set the href and add error message
token.attributes["error"] = errorMessage ?? "Invalid URL";
}
token.text = ""; // Text will be collected separately
charIndex++;
break;
} else if (charIndex + 1 < text.length &&
text.substring(charIndex).startsWith("@@")) {
// URL only, no text
String urlStr = url.toString();
// Ensure URL is a secure remote URL
var (isValid, errorMessage) = _validateUrlSecurity(urlStr);
if (isValid) {
token.attributes["href"] = urlStr;
if (errorMessage != null) {
token.attributes["error"] = errorMessage;
}
token.text = urlStr;
} else {
// If not valid, don't set the href and add error message
token.attributes["error"] = errorMessage ?? "Invalid URL";
token.text =
urlStr; // Still show the text though it's not a link
}
isInLink = false;
charIndex += 2;
break;
} else {
url.write(text[charIndex]);
charIndex++;
}
}
} else {
isInLink = false;
token.type = TokenType.link;
token.text = ""; // End marker
}
}
// Handle image [[image.jpg]]
else if (text.substring(charIndex).startsWith("[[")) {
charIndex += 2;
StringBuilder imageData = StringBuilder();
while (charIndex < text.length &&
!text.substring(charIndex).startsWith("]]")) {
imageData.write(text[charIndex]);
charIndex++;
}
if (charIndex < text.length) {
// Skip the closing ]]
charIndex += 2;
token.type = TokenType.image;
// Parse image data (simplified - no size support)
String imageStr = imageData.toString();
String src = imageStr.trim();
// Remove any size specifications - but don't affect the protocol part (https:)
// Look for colons after the protocol part
final protocolEndsAt = src.indexOf("://");
if (protocolEndsAt != -1 &&
src.indexOf(":", protocolEndsAt + 3) != -1) {
// Found a colon after the protocol - treat as size specification
src = src.substring(0, src.indexOf(":", protocolEndsAt + 3));
} else if (!src.contains("://") && src.contains(":")) {
// No protocol but has colon - old behavior for backward compatibility
src = src.split(":")[0].trim();
}
// Ensure URL is a secure remote URL
var (isValid, errorMessage) = _validateUrlSecurity(src);
if (isValid) {
token.attributes["src"] = src;
if (errorMessage != null) {
token.attributes["error"] = errorMessage;
}
} else {
// If not valid, don't set the source and add error message
token.attributes["error"] = errorMessage ?? "Invalid URL";
}
token.text = "";
}
}
// Handle code spans with backticks
else if (text[charIndex] == '`') {
charIndex++;
token.type = TokenType.codeSpan;
StringBuilder code = StringBuilder();
while (charIndex < text.length && text[charIndex] != '`') {
code.write(text[charIndex]);
charIndex++;
}
if (charIndex < text.length) {
charIndex++; // Skip closing backtick
}
token.text = code.toString();
}
// Handle spaces
else if (text[charIndex] == ' ') {
charIndex++;
token.type = TokenType.space;
token.text = " ";
while (charIndex < text.length && text[charIndex] == ' ') {
token.text += " ";
charIndex++;
}
token.isSpace = true;
}
// Handle newlines
else if (text[charIndex] == '\n') {
charIndex++;
token.type = TokenType.newline;
token.text = "\n";
tokenStartsLine = true;
}
// Handle regular characters
else {
token.type = TokenType.text;
token.text = text[charIndex];
charIndex++;
}
tokenArray.add(token);
}
return tokenArray;
}