convert static method
Converts the given htmlText
into plain text.
- It keeps code untouched in the
<pre>
elements - Blockquotes are transformed into lines starting with
>
- HTML entities are transformed to their plain text representation
Implementation
static String convert(final String htmlText) {
final matches = _htmlTagRegex.allMatches(htmlText).toList();
final plainTextBuffer = StringBuffer();
var lastMatchIndex = 0;
var blockquoteCounter = 0;
var orderedListItemIndex = 0;
var isInOrderedList = false;
var isInUnorderedList = false;
for (var i = 0; i < matches.length; i++) {
var match = matches[i];
if (match.start > lastMatchIndex) {
final textBetweenMatches =
htmlText.substring(lastMatchIndex, match.start);
writeConvertHtmlEntities(textBetweenMatches, plainTextBuffer);
}
final tag = match.group(0)!.toLowerCase();
if (tag.startsWith('<pre')) {
final preContentStart = match.end;
for (var j = i + 1; j < matches.length; j++) {
final inPreMatch = matches[j];
final inPreTag = inPreMatch.group(0)!.toLowerCase();
if (inPreTag.startsWith('</pre')) {
i = j;
plainTextBuffer
.write(htmlText.substring(preContentStart, inPreMatch.start));
match = inPreMatch;
break;
}
}
} else if (tag.startsWith('<blockquote')) {
plainTextBuffer.write('>');
blockquoteCounter++;
} else if (tag.startsWith('</blockquote')) {
blockquoteCounter--;
} else if (tag.startsWith('<p') || tag.startsWith('<br')) {
plainTextBuffer.write('\n');
for (var q = 0; q < blockquoteCounter; q++) {
plainTextBuffer.write('>');
}
} else if (tag.startsWith('<ul')) {
isInUnorderedList = true;
} else if (isInUnorderedList && tag.startsWith('</ul')) {
isInUnorderedList = false;
plainTextBuffer.write('\n');
} else if (tag.startsWith('<ol')) {
isInOrderedList = true;
orderedListItemIndex = 0;
} else if (isInOrderedList && tag.startsWith('</ol')) {
isInOrderedList = false;
plainTextBuffer.write('\n');
} else if (tag.startsWith('<li')) {
plainTextBuffer.write('\n');
if (isInUnorderedList) {
plainTextBuffer.write(' * ');
} else {
orderedListItemIndex++;
plainTextBuffer
..write(' ')
..write(orderedListItemIndex)
..write('. ');
}
}
lastMatchIndex = match.end;
}
if (lastMatchIndex < htmlText.length) {
writeConvertHtmlEntities(
htmlText.substring(lastMatchIndex), plainTextBuffer);
}
// remove line-breaks and whitespace at start:
final plainText = plainTextBuffer
.toString()
.replaceFirst(_lineBreakOrWhiteSpaceRegex, '');
return plainText;
}