splitCapitalizedUnicode method
Splits a string by capitalized letters (Unicode-aware) and optionally by spaces, with an option to prevent splits that result in short segments.
splitNumbers
: If true, also splits before digits.splitBySpace
: If true, splits the result by whitespace.minLength
: Merges adjacent splits if either segment is shorter than this length.
Implementation
List<String> splitCapitalizedUnicode({
bool splitNumbers = false,
bool splitBySpace = false,
int minLength = 1,
}) {
// Method entry point.
if (isEmpty) return <String>[]; // Handle empty string case.
// Define the regex for splitting at capitalized letters (and optionally numbers).
final RegExp capitalizationPattern = RegExp(
splitNumbers
// FIX: Added an OR condition `|(?<=\p{Nd})(?=\p{L})`
// This now handles both (lowercase -> uppercase/digit) AND (digit -> letter).
? r'(?<=\p{Ll})(?=\p{Lu}|\p{Lt}|\p{Nd})|(?<=\p{Nd})(?=\p{L})' // Lower -> Upper/Title/Digit OR Digit -> Letter
: r'(?<=\p{Ll})(?=\p{Lu}|\p{Lt})', // Lower -> Upper/Title
unicode: true,
);
// Perform the initial split based on the capitalization pattern.
List<String> intermediateSplit = split(capitalizationPattern);
// Check if merging is needed based on minLength.
if (minLength > 1 && intermediateSplit.length > 1) {
// Logic to merge short segments.
final List<String> mergedResult = <String>[];
String currentBuffer = intermediateSplit.first;
// Loop through the segments to check for necessary merges.
for (int i = 1; i < intermediateSplit.length; i++) {
final String nextPart = intermediateSplit[i];
// If either the current or next part is too short, merge them.
if (currentBuffer.length < minLength || nextPart.length < minLength) {
currentBuffer += nextPart;
} else {
// Otherwise, finalize the current buffer and start a new one.
mergedResult.add(currentBuffer);
currentBuffer = nextPart;
}
}
// Add the final buffer to the results.
mergedResult.add(currentBuffer);
// Update the list with the merged results.
intermediateSplit = mergedResult;
}
// If we are not splitting by space, return the result now.
if (!splitBySpace) return intermediateSplit;
// Otherwise, split each segment by space and flatten the list.
return intermediateSplit
.expand((String part) => part.split(RegExp(r'\s+')))
.where((String s) => s.isNotEmpty)
.toList();
}