parseString static method
Returns a list of every root level node found in string
.
string
must not be null
.
If parseCharacterEntities
is true
, text values will be parsed
and replace all encoded character entities with their corresponding
character. parseCharacterEntities
must not be null
.
If parseComments
is true
, commments will be scrubbed
from string
before parsing. parseComments
must not be null
.
If trimWhitespace
is true
, unnecessary whitespace between nodes
will be removed and all remaining whitespace will be replaced with
a single space. trimWhitespace
must not be null
.
If returnNodesOfType
is not null
, only the nodes of the types
contained in returnNodesOfType
will be returned, otherwise, all
nodes, regardless of type, will be returned.
start
and stop
refer to the indexes of the identified nodes.
Only matches found between start
and stop
will be returned.
start
must not be null
and must be >= 0
. stop
may be null
,
but must be >= start
if provided.
Returns null
if no nodes were found.
Implementation
static List<XmlNode>? parseString(
String string, {
bool parseCharacterEntities = true,
bool parseComments = false,
bool trimWhitespace = true,
bool parseCdataAsText = true,
List<Type>? returnNodesOfType,
int start = 0,
int? stop,
}) {
assert(start >= 0);
assert(stop == null || stop >= start);
if (!parseComments) string = string.removeComments();
if (trimWhitespace) string = string.trimWhitespace();
string = string.trim();
final nodes = <XmlNode>[];
var nodeCount = 0;
while (string.contains(_delimiter)) {
RegExpMatch? delimiter;
String? node;
void setNode(RegExp regExp) {
delimiter = regExp.firstMatch(string);
node = (delimiter != null)
? string.substring(delimiter!.start, delimiter!.end)
: null;
}
setNode(_delimiter);
if (delimiter!.start > 0) {
final text = string.substring(0, delimiter!.start).trimRight();
if (text.isNotEmpty) {
nodes.add(XmlText(text));
string = string.substring(delimiter!.start);
setNode(_delimiter);
}
}
XmlNode? xmlNode;
if (node!.startsWith('<?')) {
if (node!.startsWith('<?xml')) {
// If it's a XML declaration...
setNode(Delimiters.xmlDeclaration);
if (node != null) {
xmlNode = XmlDeclaration.from(node!, trimWhitespace: false);
}
} else {
// If it's a processing instruction declaration...
setNode(Delimiters.processingInstruction);
if (node != null) {
xmlNode =
XmlProcessingInstruction.from(node!, trimWhitespace: false);
}
}
} else if (node!.startsWith('<!')) {
// If it's a comment...
if (node!.startsWith('<!--')) {
// If the delimiter wasn't closed by a comment delimiter...
if (!node!.endsWith('-->')) {
// Try to find the actual comment delimiter
setNode(Delimiters.comment);
// If the comment wasn't closed...
if (delimiter == null) {
// The entirety of the remaining string is commented.
string += '-->';
setNode(Delimiters.comment);
}
}
// Parse the node as a comment.
xmlNode = XmlComment.from(node!, trimWhitespace: false);
} else {
// If it's a markup delimiter...
final type = _markupStartDelimiter
.firstMatch(node!)
?.namedGroup('type')
?.toUpperCase();
if (type == 'ATTLIST') {
setNode(Delimiters.attlist);
if (node != null) {
xmlNode = XmlAttlist.from(node!, trimWhitespace: false);
}
} else if (type == 'CDATA') {
setNode(Delimiters.cdata);
if (node != null) {
if (parseCdataAsText) {
xmlNode =
XmlText.from(node!, trimWhitespace: false, isMarkup: true);
} else {
xmlNode = XmlCdata.from(node!, trimWhitespace: false);
}
}
} else if (type == 'DOCTYPE') {
setNode(Delimiters.doctype);
if (node != null) {
xmlNode = XmlDoctype.from(
node!,
parseCharacterEntities: parseCharacterEntities,
parseComments: true,
trimWhitespace: false,
parseCdataAsText: parseCdataAsText,
);
}
} else if (type == 'ELEMENT') {
setNode(Delimiters.etd);
if (node != null) {
xmlNode = XmlEtd.from(
node!,
trimWhitespace: false,
);
}
} else if (type == 'ENTITY') {
setNode(Delimiters.entity);
if (node != null) {
xmlNode = XmlEntity.from(
node!,
trimWhitespace: trimWhitespace,
);
}
} else if (type == 'INCLUDE' ||
type == 'IGNORE' ||
((type!.startsWith('&') || type.startsWith('%')) &&
type.endsWith(';'))) {
setNode(Delimiters.conditional);
if (node != null) {
xmlNode = XmlConditional.from(node!,
parseCharacterEntities: parseCharacterEntities,
parseComments: true,
trimWhitespace: false,
parseCdataAsText: parseCdataAsText);
}
} else if (type == 'NOTATION') {
setNode(Delimiters.notation);
if (node != null) {
xmlNode = XmlNotation.from(
node!,
trimWhitespace: false,
);
}
} else {
xmlNode = XmlText.from(
node!,
isMarkup: true,
parseCharacterEntities: parseCharacterEntities,
trimWhitespace: false,
);
}
}
} else {
// If it's an element...
// If the tag was closed by a comment delimiter, remove the comment.
while (node!.contains(Delimiters.comment)) {
string = string.replaceFirst(Delimiters.comment, '');
setNode(_delimiter);
}
// Capture the element's tag.
final tag = Delimiters.elementTag.firstMatch(node!);
final tagName = tag?.namedGroup('tagName');
// Only parse opening tags. If a closing tag was found, it was found
// without a corresponding opening tag and shouldn't be parsed.
if (tagName?.isNotEmpty == true && !tagName!.startsWith('/')) {
// If it's not an empty element, capture the whole element.
if (tag!.namedGroup('isEmpty') != '/') {
final RegExp element = Delimiters.element(tagName);
setNode(element);
}
if (node != null) {
xmlNode = XmlElement.from(node!,
parseCharacterEntities: parseCharacterEntities,
parseComments: true,
trimWhitespace: false,
parseCdataAsText: parseCdataAsText);
}
}
}
if (xmlNode == null) {
setNode(_delimiter);
xmlNode = XmlText.from(
node!,
parseCharacterEntities: parseCharacterEntities,
trimWhitespace: false,
);
}
if (returnNodesOfType == null ||
returnNodesOfType.contains(xmlNode.runtimeType)) {
if (nodeCount >= start) nodes.add(xmlNode);
nodeCount++;
if (stop != null && nodeCount > stop) break;
}
string = string.substring(delimiter!.end).trimLeft();
}
if (string.isNotEmpty &&
(returnNodesOfType == null || returnNodesOfType.contains(XmlText))) {
nodes.add(XmlText.from(
string,
parseCharacterEntities: parseCharacterEntities,
trimWhitespace: false,
));
}
if (nodes.isEmpty) return null;
return nodes;
}