splitCapitalizedUnicode method

List<String> splitCapitalizedUnicode({
  1. bool splitNumbers = false,
  2. bool splitBySpace = false,
  3. int minLength = 1,
})

Splits a string by capitalized letters (Unicode-aware) and optionally by spaces, with an option to prevent splits that result in short segments.

  • splitNumbers: If true, also splits before digits.
  • splitBySpace: If true, splits the result by whitespace.
  • minLength: Merges adjacent splits if either segment is shorter than this length.

Implementation

List<String> splitCapitalizedUnicode({
  bool splitNumbers = false,
  bool splitBySpace = false,
  int minLength = 1,
}) {
  // Method entry point.
  if (isEmpty) return <String>[]; // Handle empty string case.

  // Define the regex for splitting at capitalized letters (and optionally numbers).
  final RegExp capitalizationPattern = RegExp(
    splitNumbers
        // FIX: Added an OR condition `|(?<=\p{Nd})(?=\p{L})`
        // This now handles both (lowercase -> uppercase/digit) AND (digit -> letter).
        ? r'(?<=\p{Ll})(?=\p{Lu}|\p{Lt}|\p{Nd})|(?<=\p{Nd})(?=\p{L})' // Lower -> Upper/Title/Digit OR Digit -> Letter
        : r'(?<=\p{Ll})(?=\p{Lu}|\p{Lt})', // Lower -> Upper/Title
    unicode: true,
  );
  // Perform the initial split based on the capitalization pattern.
  List<String> intermediateSplit = split(capitalizationPattern);

  // Check if merging is needed based on minLength.
  if (minLength > 1 && intermediateSplit.length > 1) {
    // Logic to merge short segments.
    final List<String> mergedResult = <String>[];
    String currentBuffer = intermediateSplit.first;
    // Loop through the segments to check for necessary merges.
    for (int i = 1; i < intermediateSplit.length; i++) {
      final String nextPart = intermediateSplit[i];
      // If either the current or next part is too short, merge them.
      if (currentBuffer.length < minLength || nextPart.length < minLength) {
        currentBuffer += nextPart;
      } else {
        // Otherwise, finalize the current buffer and start a new one.
        mergedResult.add(currentBuffer);
        currentBuffer = nextPart;
      }
    }
    // Add the final buffer to the results.
    mergedResult.add(currentBuffer);
    // Update the list with the merged results.
    intermediateSplit = mergedResult;
  }

  // If we are not splitting by space, return the result now.
  if (!splitBySpace) return intermediateSplit;

  // Otherwise, split each segment by space and flatten the list.
  return intermediateSplit
      .expand((String part) => part.split(RegExp(r'\s+')))
      .where((String s) => s.isNotEmpty)
      .toList();
}