nGrams method

List<String> nGrams(
  1. NGramRange range
)

Returns an ordered collection of n-grams from the list of Strings.

The n-gram lengths are limited by range. If range is NGramRange(1,1) the list will be returned as is.

Implementation

List<String> nGrams(NGramRange range) {
  // return empty list if the collection is empty
  if (isEmpty) return [];

  // initialize the return value collection
  final retVal = <String>[];
  // initialize a rolling n-gram element word-list
  final nGramTerms = <String>[];
  // iterate through the terms
  for (var term in this) {
    // initialize the ngrams collection
    final nGrams = <List<String>>[];
    // remove white-space at start and end of term
    term = term.normalizeWhitespace();
    // ignore empty strings
    if (term.isNotEmpty) {
      nGramTerms.add(term);
      if (nGramTerms.length > range.max) {
        nGramTerms.removeAt(0);
      }
      var n = 0;
      for (var i = nGramTerms.length - 1; i >= 0; i--) {
        final param = <List<String>>[];
        param.addAll(nGrams
            .where((element) => element.length == n)
            .map((e) => List<String>.from(e)));
        final newNGrams = _prefixWordTo(param, nGramTerms[i]);
        nGrams.addAll(newNGrams);
        n++;
      }
    }
    final tokenGrams = nGrams.where((element) =>
        element.length >= range.min && element.length <= range.max);
    for (final e in tokenGrams) {
      final nGram = e.join(' ').normalizeWhitespace();
      if (nGram.isNotEmpty) {
        retVal.add(nGram);
      }
    }
  }
  return retVal;
}