applyDictionaryCorrectionOnSingleSentence function
Applies dictionary-based correction to inputSentence. It first tries to match words
directly in the dictionary, then attempts to substitute commonly confused characters correctionLetters,
and finally finds the closest match in the dictionary if no direct match is found.
The original casing of the input words is preserved in the corrected output.
Implementation
String applyDictionaryCorrectionOnSingleSentence(
final String inputSentence,
final Map<String, List<String>> correctionLetters,
) {
final regex = RegExp(
r'(\s+|[.,!?;:])',
); // Matches spaces or single punctuation marks
final words = inputSentence
.splitMapJoin(
regex,
onMatch: (m) => '¤${m[0]}¤', // Tag matched pieces
onNonMatch: (n) => '¤$n¤', // Tag non-matched parts (i.e., words)
)
.split('¤')
.where((s) => s.isNotEmpty)
.toList();
for (int i = 0; i < words.length; i++) {
String word = words[i];
if (word.length > 1 &&
!['.', ',', '!', '?', ';', ':', ' '].contains(word)) {
// No need to process numbers
if (!CharacterStats(word).mostlyDigits()) {
//
// Try direct dictionary match first
//
if (!englishWords.contains(word.toLowerCase())) {
//
// Try substituting commonly confused characters
//
String modifiedWord = word;
bool foundMatch = false;
for (final MapEntry<String, List<String>> entry
in correctionLetters.entries) {
if (word.contains(entry.key)) {
for (final String substitute in entry.value) {
final String testWord = word.replaceAll(entry.key, substitute);
if (englishWords.contains(testWord.toLowerCase())) {
modifiedWord = testWord;
foundMatch = true;
break;
}
}
if (foundMatch) {
break;
}
}
}
if (!foundMatch) {
// If no direct match after substitutions, find closest match
modifiedWord = findClosestMatchingWordInDictionary(word);
}
words[i] = modifiedWord;
}
}
}
}
return normalizeCasingOfParagraph(words.join(''));
}