fit method

List<int> fit(
  1. List<List<double>> X, {
  2. int k = 1,
})

Implementation

List<int> fit(List<List<double>> X, {int k = 1}) {
  final n = X.length;
  if (n == 0) throw ArgumentError('Empty dataset');
  if (k < 1 || k > n) throw ArgumentError('k out of range');

  // naive distance matrix
  final dist = List.generate(
    n,
    (_) => List<double>.filled(n, double.infinity),
  );
  for (var i = 0; i < n; i++) {
    for (var j = i + 1; j < n; j++) {
      dist[i][j] = _euclidSq(X[i], X[j]);
      dist[j][i] = dist[i][j];
    }
  }
  // each point starts in its own cluster
  final clusters = List<List<int>>.generate(n, (i) => [i]);
  final active = List<bool>.filled(n, true);
  int remaining = n;
  while (remaining > k) {
    // find closest pair
    var bestI = -1, bestJ = -1;
    var bestD = double.infinity;
    for (var i = 0; i < n; i++) {
      if (!active[i]) continue;
      for (var j = i + 1; j < n; j++) {
        if (!active[j]) continue;
        if (dist[i][j] < bestD) {
          bestD = dist[i][j];
          bestI = i;
          bestJ = j;
        }
      }
    }
    if (bestI < 0) break;
    // merge j into i
    clusters[bestI].addAll(clusters[bestJ]);
    active[bestJ] = false;
    remaining--;
    // update distances
    for (var t = 0; t < n; t++) {
      if (!active[t] || t == bestI) continue;
      double newD;
      if (linkage == Linkage.single) {
        newD = clusters[bestI]
            .map((a) => clusters[bestJ].map((b) => dist[a][b]).reduce(min))
            .reduce(min);
      } else if (linkage == Linkage.complete) {
        newD = clusters[bestI]
            .map((a) => clusters[bestJ].map((b) => dist[a][b]).reduce(max))
            .reduce(max);
      } else {
        // average
        double s = 0.0;
        var cnt = 0;
        for (var a in clusters[bestI]) {
          for (var b in clusters[bestJ]) {
            s += dist[a][b];
            cnt++;
          }
        }
        newD = s / cnt;
      }
      dist[bestI][t] = dist[t][bestI] = newD;
    }
  }
  final labels = List<int>.filled(n, -1);
  var idx = 0;
  for (var i = 0; i < n; i++) {
    if (!active[i]) continue;
    for (var v in clusters[i]) {
      labels[v] = idx;
    }
    idx++;
  }
  return labels;
}