convert static method

String convert(
  1. String htmlText
)

Converts the given htmlText into plain text.

  • It keeps code untouched in the <pre> elements
  • Blockquotes are transformed into lines starting with >
  • HTML entities are transformed to their plain text representation

Implementation

static String convert(final String htmlText) {
  final matches = _htmlTagRegex.allMatches(htmlText).toList();
  final plainTextBuffer = StringBuffer();
  var lastMatchIndex = 0;
  var blockquoteCounter = 0;
  var orderedListItemIndex = 0;
  var isInOrderedList = false;
  var isInUnorderedList = false;
  for (var i = 0; i < matches.length; i++) {
    var match = matches[i];
    if (match.start > lastMatchIndex) {
      final textBetweenMatches =
          htmlText.substring(lastMatchIndex, match.start);
      writeConvertHtmlEntities(textBetweenMatches, plainTextBuffer);
    }
    final tag = match.group(0)!.toLowerCase();
    if (tag.startsWith('<pre')) {
      final preContentStart = match.end;
      for (var j = i + 1; j < matches.length; j++) {
        final inPreMatch = matches[j];
        final inPreTag = inPreMatch.group(0)!.toLowerCase();
        if (inPreTag.startsWith('</pre')) {
          i = j;
          plainTextBuffer
              .write(htmlText.substring(preContentStart, inPreMatch.start));
          match = inPreMatch;
          break;
        }
      }
    } else if (tag.startsWith('<blockquote')) {
      plainTextBuffer.write('>');
      blockquoteCounter++;
    } else if (tag.startsWith('</blockquote')) {
      blockquoteCounter--;
    } else if (tag.startsWith('<p') || tag.startsWith('<br')) {
      plainTextBuffer.write('\n');
      for (var q = 0; q < blockquoteCounter; q++) {
        plainTextBuffer.write('>');
      }
    } else if (tag.startsWith('<ul')) {
      isInUnorderedList = true;
    } else if (isInUnorderedList && tag.startsWith('</ul')) {
      isInUnorderedList = false;
      plainTextBuffer.write('\n');
    } else if (tag.startsWith('<ol')) {
      isInOrderedList = true;
      orderedListItemIndex = 0;
    } else if (isInOrderedList && tag.startsWith('</ol')) {
      isInOrderedList = false;
      plainTextBuffer.write('\n');
    } else if (tag.startsWith('<li')) {
      plainTextBuffer.write('\n');
      if (isInUnorderedList) {
        plainTextBuffer.write(' * ');
      } else {
        orderedListItemIndex++;
        plainTextBuffer
          ..write(' ')
          ..write(orderedListItemIndex)
          ..write('. ');
      }
    }
    lastMatchIndex = match.end;
  }
  if (lastMatchIndex < htmlText.length) {
    writeConvertHtmlEntities(
        htmlText.substring(lastMatchIndex), plainTextBuffer);
  }
  // remove line-breaks and whitespace at start:
  final plainText = plainTextBuffer
      .toString()
      .replaceFirst(_lineBreakOrWhiteSpaceRegex, '');
  return plainText;
}