withNumericColumnBinned method

DataFrame withNumericColumnBinned(
  1. String name, {
  2. List<(num, num)> bins = const [],
  3. int decimalPlaces = 3,
})

A new data frame with a categoric column containing bins for values.

Example:

final d = petals.withColumns(["petal_width", "species"]);
print(d);
print(d.withNumericColumnBinned("petal_width"));

.-----------.------------.
|petal_width|species     |
:-----------+------------:
|0.2        |setosa      |
|0.2        |setosa      |
|0.2        |setosa      |
|0.2        |setosa      |
|1.4        |versicolor  |
|1.5        |versicolor  |
|1.5        |versicolor  |
|1.3        |versicolor  |
|2.5        |virginica   |
|1.9        |virginica   |
|2.1        |virginica   |
|1.8        |virginica   |
'-----------'------------'

.-----------.------------.-----------------.
|petal_width|species     |petal_width_bin  |
:-----------+------------+-----------------:
|0.2        |setosa      |[-0.030, 0.660)  |
|0.2        |setosa      |[-0.030, 0.660)  |
|0.2        |setosa      |[-0.030, 0.660)  |
|0.2        |setosa      |[-0.030, 0.660)  |
|1.4        |versicolor  |[1.350, 2.040)   |
|1.5        |versicolor  |[1.350, 2.040)   |
|1.5        |versicolor  |[1.350, 2.040)   |
|1.3        |versicolor  |[0.660, 1.350)   |
|2.5        |virginica   |[2.040, 2.730)   |
|1.9        |virginica   |[1.350, 2.040)   |
|2.1        |virginica   |[2.040, 2.730)   |
|1.8        |virginica   |[1.350, 2.040)   |
'-----------'------------'-----------------'

Implementation

DataFrame withNumericColumnBinned(
  String name, {
  List<(num, num)> bins = const [],
  int decimalPlaces = 3,
}) {
  if (!numericColumns.containsKey(name)) {
    throw PackhorseError.badArgument("No numeric column called '$name'.");
  }
  final column = numericColumns[name]!;
  if (bins.isEmpty) {
    final lower = column.minimum - 0.1 * column.range,
        upper = column.maximum + 0.1 * column.range,
        n = math.max(4, math.sqrt(upper - lower).round()),
        binWidth = (upper - lower) / n;
    bins = [
      for (var i = 0; i < n; i++)
        (lower + i * binWidth, lower + (i + 1) * binWidth)
    ];
  }
  String bin(num x) {
    for (final (a, b) in bins) {
      if (x >= a && x < b) {
        return "[${a.toStringAsFixed(decimalPlaces)}, "
            "${b.toStringAsFixed(decimalPlaces)})";
      }
    }
    return CategoricColumn.missingValueMarker;
  }

  return copy
    ..categoricColumns["${name}_bin"] =
        CategoricColumn(numericColumns[name]!.values.map(bin));
}