asSentence method

String asSentence()

Formats the string as a proper sentence with capitalisation and punctuation.

Capitalises the first word, normalises spacing, and ensures the sentence ends with a terminator (. ? ! or ;). Leading punctuation is moved to after the capitalised word. If the string already ends with a terminator, none is added.

Useful for converting user input, variable names, or fragment text into grammatically presentable sentences.

Example:

'hello world'.asSentence()           // 'Hello world.'
'(hello) world'.asSentence()         // 'Hello (world.'
'hello world!'.asSentence()          // 'Hello world!'
'  hello   world  '.asSentence()     // 'Hello world.'
'HELLO'.asSentence()                 // 'HELLO.'

Implementation

String asSentence() {
  if (isEmpty) return '';

  final records = <({String sentence, String cluster})>[];
  var remaining = this;
  var isFirstRecord = true;
  var orphanCluster = '';

  // Phase 1: Split by terminators and clusters
  while (remaining.isNotEmpty) {
    // Find next terminator
    var terminatorIndex = -1;
    for (var i = 0; i < remaining.length; i++) {
      if (terminators.contains(remaining[i])) {
        terminatorIndex = i;
        break;
      }
    }

    // If no terminator found, extract trailing non-word chars
    if (terminatorIndex == -1) {
      var clusterStart = remaining.length;
      while (clusterStart > 0 && !RegExp('[' + wordChars + ']').hasMatch(remaining[clusterStart - 1])) {
        clusterStart--;
      }
      var sentence = orphanCluster + remaining.substring(0, clusterStart);
      var cluster = remaining.substring(clusterStart);
      records.add((sentence: sentence, cluster: cluster));
      orphanCluster = '';
      break;
    }

    // Find start of cluster (non-word chars before terminator)
    var clusterStart = terminatorIndex;
    while (clusterStart > 0 && !RegExp('[' + wordChars + ']').hasMatch(remaining[clusterStart - 1])) {
      clusterStart--;
    }

    // If cluster is at start of string and first record, store as orphan and skip
    if (clusterStart == 0 && isFirstRecord) {
      var clusterEnd = terminatorIndex;
      while (clusterEnd < remaining.length && !RegExp('[' + wordChars + ']').hasMatch(remaining[clusterEnd])) {
        clusterEnd++;
      }
      orphanCluster = remaining.substring(0, clusterEnd);
      remaining = remaining.substring(clusterEnd);
      continue;
    }

    // Extract sentence and cluster
    var sentence = orphanCluster + remaining.substring(0, clusterStart);

    // Find end of cluster (non-word chars after terminator)
    var clusterEnd = terminatorIndex;
    while (clusterEnd < remaining.length && !RegExp('[' + wordChars + ']').hasMatch(remaining[clusterEnd])) {
      clusterEnd++;
    }
    var cluster = remaining.substring(clusterStart, clusterEnd);

    records.add((sentence: sentence, cluster: cluster));
    orphanCluster = '';
    remaining = remaining.substring(clusterEnd);
    isFirstRecord = false;
  }

  // If only orphan cluster exists, return it
  if (records.isEmpty && orphanCluster.isNotEmpty) {
    return orphanCluster;
  }

  // Phase 2: Process each record
  final processed = <({String sentence, String cluster})>[];
  for (var i = 0; i < records.length; i++) {
    var sent = records[i].sentence;
    var clust = records[i].cluster;

    // Capitalize first letter of sentence if it's a word character
    if (sent.isNotEmpty && RegExp('[' + wordChars + ']').hasMatch(sent[0])) {
      sent = sent[0].toUpperCase() + sent.substring(1);
    }

    // If cluster ends with terminator, append space (except on last record)
    if (i < records.length - 1 && clust.isNotEmpty && terminators.contains(clust[clust.length - 1])) {
      clust += ' ';
    }

    // Last record: if no terminator in cluster, insert period before whitespace or append
    if (i == records.length - 1) {
      var hasTerminator = clust.isNotEmpty && clust.split('').any((c) => terminators.contains(c));
      if (!hasTerminator) {
        // Find first whitespace in cluster
        var firstWhitespaceIndex = -1;
        for (var j = 0; j < clust.length; j++) {
          if (clust[j] == ' ' || clust[j] == '\t' || clust[j] == '\n') {
            firstWhitespaceIndex = j;
            break;
          }
        }

        if (firstWhitespaceIndex >= 0) {
          // Insert period before first whitespace
          clust = clust.substring(0, firstWhitespaceIndex) + '.' + clust.substring(firstWhitespaceIndex);
        } else {
          // No whitespace, append to end
          clust = clust + '.';
        }
      }
    }

    processed.add((sentence: sent, cluster: clust));
  }

  // Phase 3: Build output
  var output = StringBuffer();
  for (var i = 0; i < processed.length; i++) {
    var sent = processed[i].sentence;
    var clust = processed[i].cluster;

    output.write(sent);
    output.write(clust);
  }

  return output.toString();
}