parseString static method

List<XmlNode>? parseString(
  1. String string, {
  2. bool parseCharacterEntities = true,
  3. bool parseComments = false,
  4. bool trimWhitespace = true,
  5. bool parseCdataAsText = true,
  6. List<Type>? returnNodesOfType,
  7. int start = 0,
  8. int? stop,
})

Returns a list of every root level node found in string. string must not be null.

If parseCharacterEntities is true, text values will be parsed and replace all encoded character entities with their corresponding character. parseCharacterEntities must not be null.

If parseComments is true, commments will be scrubbed from string before parsing. parseComments must not be null.

If trimWhitespace is true, unnecessary whitespace between nodes will be removed and all remaining whitespace will be replaced with a single space. trimWhitespace must not be null.

If returnNodesOfType is not null, only the nodes of the types contained in returnNodesOfType will be returned, otherwise, all nodes, regardless of type, will be returned.

start and stop refer to the indexes of the identified nodes. Only matches found between start and stop will be returned. start must not be null and must be >= 0. stop may be null, but must be >= start if provided.

Returns null if no nodes were found.

Implementation

static List<XmlNode>? parseString(
  String string, {
  bool parseCharacterEntities = true,
  bool parseComments = false,
  bool trimWhitespace = true,
  bool parseCdataAsText = true,
  List<Type>? returnNodesOfType,
  int start = 0,
  int? stop,
}) {
  assert(start >= 0);
  assert(stop == null || stop >= start);

  if (!parseComments) string = string.removeComments();
  if (trimWhitespace) string = string.trimWhitespace();
  string = string.trim();

  final nodes = <XmlNode>[];
  var nodeCount = 0;

  while (string.contains(_delimiter)) {
    RegExpMatch? delimiter;
    String? node;

    void setNode(RegExp regExp) {
      delimiter = regExp.firstMatch(string);
      node = (delimiter != null)
          ? string.substring(delimiter!.start, delimiter!.end)
          : null;
    }

    setNode(_delimiter);

    if (delimiter!.start > 0) {
      final text = string.substring(0, delimiter!.start).trimRight();

      if (text.isNotEmpty) {
        nodes.add(XmlText(text));
        string = string.substring(delimiter!.start);
        setNode(_delimiter);
      }
    }

    XmlNode? xmlNode;

    if (node!.startsWith('<?')) {
      if (node!.startsWith('<?xml')) {
        // If it's a XML declaration...
        setNode(Delimiters.xmlDeclaration);
        if (node != null) {
          xmlNode = XmlDeclaration.from(node!, trimWhitespace: false);
        }
      } else {
        // If it's a processing instruction declaration...
        setNode(Delimiters.processingInstruction);
        if (node != null) {
          xmlNode =
              XmlProcessingInstruction.from(node!, trimWhitespace: false);
        }
      }
    } else if (node!.startsWith('<!')) {
      // If it's a comment...
      if (node!.startsWith('<!--')) {
        // If the delimiter wasn't closed by a comment delimiter...
        if (!node!.endsWith('-->')) {
          // Try to find the actual comment delimiter
          setNode(Delimiters.comment);

          // If the comment wasn't closed...
          if (delimiter == null) {
            // The entirety of the remaining string is commented.
            string += '-->';
            setNode(Delimiters.comment);
          }
        }

        // Parse the node as a comment.
        xmlNode = XmlComment.from(node!, trimWhitespace: false);
      } else {
        // If it's a markup delimiter...
        final type = _markupStartDelimiter
            .firstMatch(node!)
            ?.namedGroup('type')
            ?.toUpperCase();

        if (type == 'ATTLIST') {
          setNode(Delimiters.attlist);
          if (node != null) {
            xmlNode = XmlAttlist.from(node!, trimWhitespace: false);
          }
        } else if (type == 'CDATA') {
          setNode(Delimiters.cdata);
          if (node != null) {
            if (parseCdataAsText) {
              xmlNode =
                  XmlText.from(node!, trimWhitespace: false, isMarkup: true);
            } else {
              xmlNode = XmlCdata.from(node!, trimWhitespace: false);
            }
          }
        } else if (type == 'DOCTYPE') {
          setNode(Delimiters.doctype);
          if (node != null) {
            xmlNode = XmlDoctype.from(
              node!,
              parseCharacterEntities: parseCharacterEntities,
              parseComments: true,
              trimWhitespace: false,
              parseCdataAsText: parseCdataAsText,
            );
          }
        } else if (type == 'ELEMENT') {
          setNode(Delimiters.etd);
          if (node != null) {
            xmlNode = XmlEtd.from(
              node!,
              trimWhitespace: false,
            );
          }
        } else if (type == 'ENTITY') {
          setNode(Delimiters.entity);
          if (node != null) {
            xmlNode = XmlEntity.from(
              node!,
              trimWhitespace: trimWhitespace,
            );
          }
        } else if (type == 'INCLUDE' ||
            type == 'IGNORE' ||
            ((type!.startsWith('&') || type.startsWith('%')) &&
                type.endsWith(';'))) {
          setNode(Delimiters.conditional);
          if (node != null) {
            xmlNode = XmlConditional.from(node!,
                parseCharacterEntities: parseCharacterEntities,
                parseComments: true,
                trimWhitespace: false,
                parseCdataAsText: parseCdataAsText);
          }
        } else if (type == 'NOTATION') {
          setNode(Delimiters.notation);
          if (node != null) {
            xmlNode = XmlNotation.from(
              node!,
              trimWhitespace: false,
            );
          }
        } else {
          xmlNode = XmlText.from(
            node!,
            isMarkup: true,
            parseCharacterEntities: parseCharacterEntities,
            trimWhitespace: false,
          );
        }
      }
    } else {
      // If it's an element...
      // If the tag was closed by a comment delimiter, remove the comment.
      while (node!.contains(Delimiters.comment)) {
        string = string.replaceFirst(Delimiters.comment, '');
        setNode(_delimiter);
      }

      // Capture the element's tag.
      final tag = Delimiters.elementTag.firstMatch(node!);
      final tagName = tag?.namedGroup('tagName');

      // Only parse opening tags. If a closing tag was found, it was found
      // without a corresponding opening tag and shouldn't be parsed.
      if (tagName?.isNotEmpty == true && !tagName!.startsWith('/')) {
        // If it's not an empty element, capture the whole element.
        if (tag!.namedGroup('isEmpty') != '/') {
          final RegExp element = Delimiters.element(tagName);
          setNode(element);
        }

        if (node != null) {
          xmlNode = XmlElement.from(node!,
              parseCharacterEntities: parseCharacterEntities,
              parseComments: true,
              trimWhitespace: false,
              parseCdataAsText: parseCdataAsText);
        }
      }
    }

    if (xmlNode == null) {
      setNode(_delimiter);
      xmlNode = XmlText.from(
        node!,
        parseCharacterEntities: parseCharacterEntities,
        trimWhitespace: false,
      );
    }

    if (returnNodesOfType == null ||
        returnNodesOfType.contains(xmlNode.runtimeType)) {
      if (nodeCount >= start) nodes.add(xmlNode);
      nodeCount++;
      if (stop != null && nodeCount > stop) break;
    }

    string = string.substring(delimiter!.end).trimLeft();
  }

  if (string.isNotEmpty &&
      (returnNodesOfType == null || returnNodesOfType.contains(XmlText))) {
    nodes.add(XmlText.from(
      string,
      parseCharacterEntities: parseCharacterEntities,
      trimWhitespace: false,
    ));
  }

  if (nodes.isEmpty) return null;

  return nodes;
}