nGrams method
Returns an ordered collection of n-grams from the list of Strings.
The n-gram lengths are limited by range
. If range is NGramRange(1,1)
the list will be returned as is.
Implementation
List<String> nGrams(NGramRange range) {
// return empty list if the collection is empty
if (isEmpty) return [];
// initialize the return value collection
final retVal = <String>[];
// initialize a rolling n-gram element word-list
final nGramTerms = <String>[];
// iterate through the terms
for (var term in this) {
// initialize the ngrams collection
final nGrams = <List<String>>[];
// remove white-space at start and end of term
term = term.normalizeWhitespace();
// ignore empty strings
if (term.isNotEmpty) {
nGramTerms.add(term);
if (nGramTerms.length > range.max) {
nGramTerms.removeAt(0);
}
var n = 0;
for (var i = nGramTerms.length - 1; i >= 0; i--) {
final param = <List<String>>[];
param.addAll(nGrams
.where((element) => element.length == n)
.map((e) => List<String>.from(e)));
final newNGrams = _prefixWordTo(param, nGramTerms[i]);
nGrams.addAll(newNGrams);
n++;
}
}
final tokenGrams = nGrams.where((element) =>
element.length >= range.min && element.length <= range.max);
for (final e in tokenGrams) {
final nGram = e.join(' ').normalizeWhitespace();
if (nGram.isNotEmpty) {
retVal.add(nGram);
}
}
}
return retVal;
}