withNumericColumnBinned method
A new data frame with a categoric column containing bins for values.
Example:
final d = petals.withColumns(["petal_width", "species"]);
print(d);
print(d.withNumericColumnBinned("petal_width"));
.-----------.------------.
|petal_width|species |
:-----------+------------:
|0.2 |setosa |
|0.2 |setosa |
|0.2 |setosa |
|0.2 |setosa |
|1.4 |versicolor |
|1.5 |versicolor |
|1.5 |versicolor |
|1.3 |versicolor |
|2.5 |virginica |
|1.9 |virginica |
|2.1 |virginica |
|1.8 |virginica |
'-----------'------------'
.-----------.------------.-----------------.
|petal_width|species |petal_width_bin |
:-----------+------------+-----------------:
|0.2 |setosa |[-0.030, 0.660) |
|0.2 |setosa |[-0.030, 0.660) |
|0.2 |setosa |[-0.030, 0.660) |
|0.2 |setosa |[-0.030, 0.660) |
|1.4 |versicolor |[1.350, 2.040) |
|1.5 |versicolor |[1.350, 2.040) |
|1.5 |versicolor |[1.350, 2.040) |
|1.3 |versicolor |[0.660, 1.350) |
|2.5 |virginica |[2.040, 2.730) |
|1.9 |virginica |[1.350, 2.040) |
|2.1 |virginica |[2.040, 2.730) |
|1.8 |virginica |[1.350, 2.040) |
'-----------'------------'-----------------'
Implementation
DataFrame withNumericColumnBinned(
String name, {
List<(num, num)> bins = const [],
int decimalPlaces = 3,
}) {
if (!numericColumns.containsKey(name)) {
throw PackhorseError.badArgument("No numeric column called '$name'.");
}
final column = numericColumns[name]!;
if (bins.isEmpty) {
final lower = column.minimum - 0.1 * column.range,
upper = column.maximum + 0.1 * column.range,
n = math.max(4, math.sqrt(upper - lower).round()),
binWidth = (upper - lower) / n;
bins = [
for (var i = 0; i < n; i++)
(lower + i * binWidth, lower + (i + 1) * binWidth)
];
}
String bin(num x) {
for (final (a, b) in bins) {
if (x >= a && x < b) {
return "[${a.toStringAsFixed(decimalPlaces)}, "
"${b.toStringAsFixed(decimalPlaces)})";
}
}
return CategoricColumn.missingValueMarker;
}
return copy
..categoricColumns["${name}_bin"] =
CategoricColumn(numericColumns[name]!.values.map(bin));
}