getTokenArray method

List<Token> getTokenArray(
  1. String text
)

Get token array from Pendart text

Implementation

List<Token> getTokenArray(String text) {
  List<Token> tokenArray = [];
  bool tokenStartsLine = true;

  // State tracking for various formatting elements
  bool isInPre = false;
  bool isInBold = false;
  bool isInItalic = false;
  bool isInSuperscript = false;
  bool isInSubscript = false;
  bool isInStrikethrough = false;
  bool isInUnderline = false;
  bool isInLink = false;

  text = getCleanedText(text, tabulationSpaceCount);
  int charIndex = 0;

  while (charIndex < text.length) {
    Token token = Token();
    token.startsLine = tokenStartsLine;
    tokenStartsLine = false;

    // Handle escaped characters with ¬
    if (charIndex < text.length - 1 && text[charIndex] == '¬') {
      token.text = text[charIndex + 1];
      token.isEscaped = true;
      token.type = TokenType.text;
      charIndex += 2;
    }
    // Handle headings
    else if (token.startsLine && text.substring(charIndex).startsWith("!")) {
      int headingLevel = 0;
      while (charIndex < text.length && text[charIndex] == '!') {
        headingLevel++;
        charIndex++;
      }

      if (headingLevel >= 1 &&
          headingLevel <= 6 &&
          charIndex < text.length &&
          text[charIndex] == ' ') {
        // Skip the space after heading marker
        charIndex++;

        // Set token type based on heading level
        switch (headingLevel) {
          case 1:
            token.type = TokenType.heading1;
            break;
          case 2:
            token.type = TokenType.heading2;
            break;
          case 3:
            token.type = TokenType.heading3;
            break;
          case 4:
            token.type = TokenType.heading4;
            break;
          case 5:
            token.type = TokenType.heading5;
            break;
          case 6:
            token.type = TokenType.heading6;
            break;
        }

        // Collect heading text
        StringBuilder sb = StringBuilder();
        while (charIndex < text.length && text[charIndex] != '\n') {
          sb.write(text[charIndex]);
          charIndex++;
        }
        token.text = sb.toString();
      } else {
        token.text = _repeat("!", headingLevel);
        token.type = TokenType.text;
      }
    }
    // Handle bold **text**
    else if (text.substring(charIndex).startsWith("**")) {
      isInBold = !isInBold;
      token.type = TokenType.bold;
      token.text = ""; // Marker only, no text content
      charIndex += 2;
    }
    // Handle italic %%text%%
    else if (text.substring(charIndex).startsWith("%%")) {
      isInItalic = !isInItalic;
      token.type = TokenType.italic;
      token.text = ""; // Marker only, no text content
      charIndex += 2;
    }
    // Handle superscript ^^text^^
    else if (text.substring(charIndex).startsWith("^^")) {
      isInSuperscript = !isInSuperscript;
      token.type = TokenType.superscript;
      token.text = ""; // Marker only, no text content
      charIndex += 2;
    }
    // Handle subscript ,,text,,
    else if (text.substring(charIndex).startsWith(",,")) {
      isInSubscript = !isInSubscript;
      token.type = TokenType.subscript;
      token.text = ""; // Marker only, no text content
      charIndex += 2;
    }
    // Handle strikethrough ~~text~~
    else if (text.substring(charIndex).startsWith("~~")) {
      isInStrikethrough = !isInStrikethrough;
      token.type = TokenType.strikethrough;
      token.text = ""; // Marker only, no text content
      charIndex += 2;
    }
    // Handle underline __text__
    else if (text.substring(charIndex).startsWith("__")) {
      isInUnderline = !isInUnderline;
      token.type = TokenType.underline;
      token.text = ""; // Marker only, no text content
      charIndex += 2;
    }
    // Handle code block ::: text :::
    else if (text.substring(charIndex).startsWith(":::") && !isInPre) {
      isInPre = true;
      token.type = TokenType.codeBlock;
      token.text = ""; // Start marker, content will be collected separately
      charIndex += 3;
    } else if (text.substring(charIndex).startsWith(":::") && isInPre) {
      isInPre = false;
      token.type = TokenType.codeBlock;
      token.text = ""; // End marker, content already collected
      charIndex += 3;
    }
    // Handle horizontal rule
    else if (text.substring(charIndex).startsWith("---")) {
      token.type = TokenType.horizontalRule;
      token.text = "";
      charIndex += 3;
    }
    // Handle page break
    else if (text.substring(charIndex).startsWith("~~~")) {
      token.type = TokenType.pageBreak;
      token.text = "";
      charIndex += 3;
    }
    // Handle line break
    else if (text.substring(charIndex).startsWith("§")) {
      token.type = TokenType.lineBreak;
      token.text = "";
      charIndex += 1;
    }
    // Handle checkboxes [] and [x]
    else if (token.startsLine && text.substring(charIndex).startsWith("[")) {
      if (charIndex + 1 < text.length) {
        if (text[charIndex + 1] == ']') {
          // Unchecked checkbox
          token.type = TokenType.checkbox;
          token.attributes["checked"] = "false";
          charIndex += 2;

          // Get checkbox text
          StringBuilder checkboxText = StringBuilder();
          int tempCharIndex = charIndex;
          while (tempCharIndex < text.length && text[tempCharIndex] != '\n') {
            checkboxText.write(text[tempCharIndex]);
            tempCharIndex++;
          }
          token.text = checkboxText.toString().trim();
          charIndex = tempCharIndex;
        } else if (charIndex + 2 < text.length &&
            text[charIndex + 1] == 'x' &&
            text[charIndex + 2] == ']') {
          // Checked checkbox
          token.type = TokenType.checkbox;
          token.attributes["checked"] = "true";
          charIndex += 3;

          // Get checkbox text
          StringBuilder checkboxText = StringBuilder();
          int tempCharIndex = charIndex;
          while (tempCharIndex < text.length && text[tempCharIndex] != '\n') {
            checkboxText.write(text[tempCharIndex]);
            tempCharIndex++;
          }
          token.text = checkboxText.toString().trim();
          charIndex = tempCharIndex;
        } else {
          // Not a checkbox, just a regular character
          token.type = TokenType.text;
          token.text = text[charIndex];
          charIndex++;
        }
      } else {
        token.type = TokenType.text;
        token.text = text[charIndex];
        charIndex++;
      }
    }
    // Handle links
    else if (text.substring(charIndex).startsWith("@@")) {
      charIndex += 2;

      if (!isInLink) {
        isInLink = true;
        token.type = TokenType.link;

        // Extract URL
        StringBuilder url = StringBuilder();

        while (charIndex < text.length) {
          if (text[charIndex] == ' ') {
            // URL followed by text
            String urlStr = url.toString();

            // Ensure URL is a secure remote URL
            var (isValid, errorMessage) = _validateUrlSecurity(urlStr);
            if (isValid) {
              token.attributes["href"] = urlStr;
              if (errorMessage != null) {
                token.attributes["error"] = errorMessage;
              }
            } else {
              // If not valid, don't set the href and add error message
              token.attributes["error"] = errorMessage ?? "Invalid URL";
            }

            token.text = ""; // Text will be collected separately
            charIndex++;
            break;
          } else if (charIndex + 1 < text.length &&
              text.substring(charIndex).startsWith("@@")) {
            // URL only, no text
            String urlStr = url.toString();

            // Ensure URL is a secure remote URL
            var (isValid, errorMessage) = _validateUrlSecurity(urlStr);
            if (isValid) {
              token.attributes["href"] = urlStr;
              if (errorMessage != null) {
                token.attributes["error"] = errorMessage;
              }
              token.text = urlStr;
            } else {
              // If not valid, don't set the href and add error message
              token.attributes["error"] = errorMessage ?? "Invalid URL";
              token.text =
                  urlStr; // Still show the text though it's not a link
            }

            isInLink = false;
            charIndex += 2;
            break;
          } else {
            url.write(text[charIndex]);
            charIndex++;
          }
        }
      } else {
        isInLink = false;
        token.type = TokenType.link;
        token.text = ""; // End marker
      }
    }
    // Handle image [[image.jpg]]
    else if (text.substring(charIndex).startsWith("[[")) {
      charIndex += 2;
      StringBuilder imageData = StringBuilder();

      while (charIndex < text.length &&
          !text.substring(charIndex).startsWith("]]")) {
        imageData.write(text[charIndex]);
        charIndex++;
      }

      if (charIndex < text.length) {
        // Skip the closing ]]
        charIndex += 2;

        token.type = TokenType.image;

        // Parse image data (simplified - no size support)
        String imageStr = imageData.toString();
        String src = imageStr.trim();

        // Remove any size specifications - but don't affect the protocol part (https:)
        // Look for colons after the protocol part
        final protocolEndsAt = src.indexOf("://");
        if (protocolEndsAt != -1 &&
            src.indexOf(":", protocolEndsAt + 3) != -1) {
          // Found a colon after the protocol - treat as size specification
          src = src.substring(0, src.indexOf(":", protocolEndsAt + 3));
        } else if (!src.contains("://") && src.contains(":")) {
          // No protocol but has colon - old behavior for backward compatibility
          src = src.split(":")[0].trim();
        }

        // Ensure URL is a secure remote URL
        var (isValid, errorMessage) = _validateUrlSecurity(src);
        if (isValid) {
          token.attributes["src"] = src;
          if (errorMessage != null) {
            token.attributes["error"] = errorMessage;
          }
        } else {
          // If not valid, don't set the source and add error message
          token.attributes["error"] = errorMessage ?? "Invalid URL";
        }

        token.text = "";
      }
    }
    // Handle code spans with backticks
    else if (text[charIndex] == '`') {
      charIndex++;
      token.type = TokenType.codeSpan;

      StringBuilder code = StringBuilder();
      while (charIndex < text.length && text[charIndex] != '`') {
        code.write(text[charIndex]);
        charIndex++;
      }

      if (charIndex < text.length) {
        charIndex++; // Skip closing backtick
      }

      token.text = code.toString();
    }
    // Handle spaces
    else if (text[charIndex] == ' ') {
      charIndex++;
      token.type = TokenType.space;
      token.text = " ";

      while (charIndex < text.length && text[charIndex] == ' ') {
        token.text += " ";
        charIndex++;
      }

      token.isSpace = true;
    }
    // Handle newlines
    else if (text[charIndex] == '\n') {
      charIndex++;
      token.type = TokenType.newline;
      token.text = "\n";
      tokenStartsLine = true;
    }
    // Handle regular characters
    else {
      token.type = TokenType.text;
      token.text = text[charIndex];
      charIndex++;
    }

    tokenArray.add(token);
  }

  return tokenArray;
}