kGrams method
Returns a set of k-grams in the term.
Splits phrases into terms at all non-word characters and generates the k-grams for each word individually.
If lowerCase
is true, the text is converted to to lower case before
generating the k-grams.
Implementation
Set<String> kGrams([int k = 2, bool lowerCase = true]) {
final term = lowerCase ? trim().toLowerCase() : trim();
final Set<String> kGrams = {};
final terms = term.split(RegExp(r"[^a-zA-Z0-9À-öø-ÿ¥Œ€@™#-\&_\'\-\$]+"));
for (var term in terms) {
term = term.trim();
if (term.isNotEmpty) {
// get the opening k-gram
kGrams.add(r'$' + term.substring(0, term.length < k ? null : k - 1));
// get the closing k-gram
kGrams.add(term.length < k
? term
: (term.substring(term.length - k + 1)) + r'$');
if (term.length <= k) {
kGrams.add(term);
} else {
for (var i = 0; i <= term.length - k; i++) {
kGrams.add(term.substring(i, i + k));
}
}
}
}
return kGrams;
}