ml_dataframe 1.6.0 copy "ml_dataframe: ^1.6.0" to clipboard
ml_dataframe: ^1.6.0 copied to clipboard

An in-memory untyped data storage with the possibility to query and modify it

Build Status Coverage Status pub package Gitter Chat

ml_dataframe #

A way to store and manipulate data

The library exposes in-memory storage for dynamically typed data. The storage is represented by DataFrame class.

Table of contents #

Usage example: #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final data = [
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ];
    
  final dataframe = DataFrame(data);
    
  print(dataframe);
  // DataFrame (5 x 6)
  //  Id   SepalLengthCm   SepalWidthCm   PetalLengthCm   PetalWidthCm           Species
  //   1             5.1            3.5             1.4            0.2       Iris-setosa
  //   2             4.9            3.0             1.4            0.2       Iris-setosa
  //  89             5.6            3.0             4.1            1.3   Iris-versicolor
  //  90             5.5            2.5             4.0            1.3   Iris-versicolor
  //  91             5.5            2.6             4.4            1.2   Iris-versicolor
}

DataFrame API with examples: #

Get the header of the data #

By default, the very first row is considered a header, unless one specify their own header or autogenerated one. More on this is here

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final header = dataframe.header;

  print(header);
  // ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm', 'Species']
}

Get the rows of the data #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final rows = dataframe.rows;

  print(rows);
  // [
  //   [1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'],
  //   [2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'],
  //   [89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'],
  //   [90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'],
  //   [91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'],
  // ],
}

Get the series collection (columns) of the data #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final series = dataframe.series;
    
  print(series);
  // [
  //   'Id': [1, 2, 89, 90, 91],
  //   'SepalLengthCm': [5.1, 4.9, 5.6, 5.5, 5.5],
  //   'SepalWidthCm': [3.5, 3.0, 3.0, 2.5, 2.6],
  //   'PetalLengthCm': [1.4, 1.4, 4.1, 4.0, 4.4],
  //   'PetalWidthCm': [0.2, 0.2, 1.3, 1.3, 1.2],
  //   'Species': ['Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor'],
  // ],
}

Get the shape of the data #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final shape = dataframe.shape;

  print(shape);
  // [5, 6] - 5 rows, 6 columns
}

Add a series #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final firstSeries = Series('super_series', [1, 2, 3, 4, 5, 6]);
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);

  final modifiedDataframe = dataframe.addSeries([firstSeries]); // The method doesn't mutate the original dataframe

  print(modifiedDataframe.series.first);
  // 'super_series': [1, 2, 3, 4, 5, 6]
}

Drop a series by a series name #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);

  print(dataframe.shape);
  // [5, 6] - 6 rows, 6 columns 

  final modifiedDataframe = dataframe.dropSeries(names: ['Id']); // The method doesn't mutate the original dataframe

  print(modifiedDataframe.shape);
  // [5, 5] -  after a series had been dropped, the number of columns became one lesser
} 

Drop a series by a series index #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  print(dataframe.shape);
  // [5, 6] - 5 rows, 6 columns 

  final modifiedDataframe = dataframe.dropSeries(indices: [0]); // The method doesn't mutate the original dataframe

  print(modifiedDataframe.shape);
  // [5, 5] -  after a series had been dropped, the number of columns became one lesser
} 

Sample a new dataframe from rows of an existing dataframe #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final sampled = dataframe.sampleFromRows([0, 5]);

  print(sampled);
  // DataFrame (2 x 6)
  //  Id   SepalLengthCm   SepalWidthCm   PetalLengthCm   PetalWidthCm           Species
  //   1             5.1            3.5             1.4            0.2       Iris-setosa
  //  91             5.5            2.6             4.4            1.2   Iris-versicolor
} 

Sample a new dataframe from series indices of an existing dataframe #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final sampled = dataframe.sampleFromSeries(indices: [0, 1]);

  print(sampled);
  // DataFrame (5 x 2)
  //  Id   SepalLengthCm
  //   1             5.1
  //   2             4.9
  //  89             5.6
  //  90             5.5
  //  91             5.5
}

Sample a new dataframe from series names of an existing dataframe #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final sampled = dataframe.sampleFromSeries(names: ['Id', 'SepalLengthCm']);

  print(sampled);
  // DataFrame (5 x 2)
  //  Id   SepalLengthCm
  //   1             5.1
  //   2             4.9
  //  89             5.6
  //  90             5.5
  //  91             5.5
}

Save a dataframe to a JSON file #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() async {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  
  await dataframe.saveAsJson('path/to/json/file.json');
}

Shuffle rows in a dataframe #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  
  print(dataframe);
  // DataFrame (5 x 6)
  //  Id   SepalLengthCm   SepalWidthCm   PetalLengthCm   PetalWidthCm           Species
  //   1             5.1            3.5             1.4            0.2       Iris-setosa
  //   2             4.9            3.0             1.4            0.2       Iris-setosa
  //  89             5.6            3.0             4.1            1.3   Iris-versicolor
  //  90             5.5            2.5             4.0            1.3   Iris-versicolor
  //  91             5.5            2.6             4.4            1.2   Iris-versicolor

  final shuffled = dataframe.shuffle(); // keep in mind that `shuffle` like other methods returns a new dataframe, the method doesn't mutate the source dataframe 

  print(shuffled);
  // DataFrame (5 x 6)
  //  Id   SepalLengthCm   SepalWidthCm   PetalLengthCm   PetalWidthCm           Species
  //  89             5.6            3.0             4.1            1.3   Iris-versicolor
  //   1             5.1            3.5             1.4            0.2       Iris-setosa
  //  91             5.5            2.6             4.4            1.2   Iris-versicolor
  //   2             4.9            3.0             1.4            0.2       Iris-setosa
  //  90             5.5            2.5             4.0            1.3   Iris-versicolor
}

One can use seed parameter to keep the order of rows disregard the number of shuffle calls:

dataframe.shuffle(seed: 10);

Get a json-serializable representation #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final json = dataframe.toJson(); // json contains a serializable map
}

Convert a dataframe to a matrix: #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'],
    [   1,             5.1,            3.5,             1.4,            0.2],
    [   2,             4.9,            3.0,             1.4,            0.2],
    [  89,             5.6,            3.0,             4.1,            1.3],
    [  90,             5.5,            2.5,             4.0,            1.3],
    [  91,             5.5,            2.6,             4.4,            1.2],
  ]);
  
  final matrix = dataframe.toMatrix();
  
  print(matrix); // because of internal representation of Float32 numbers there are some round-off errors in the output
  // Matrix 5 x 5:
  // (1.0, 5.099999904632568, 3.5, 1.399999976158142, 0.20000000298023224)
  // (2.0, 4.900000095367432, 3.0, 1.399999976158142, 0.20000000298023224)
  // (89.0, 5.599999904632568, 3.0, 4.099999904632568, 1.2999999523162842)
  // (90.0, 5.5, 2.5, 4.0, 1.2999999523162842)
  // (91.0, 5.5, 2.5999999046325684, 4.400000095367432, 1.2000000476837158)
}

the method throws an error if there are inconvertible to a number values in the dataframe.

Get a series by its index #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final series = dataframe[0];

  print(series);
  // Id: [1, 2, 89, 90, 91]
}

Get a series by its name #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final dataframe = DataFrame([
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ]);
  final series = dataframe['Id'];

  print(series);
  // Id: [1, 2, 89, 90, 91]
}

Map values of a dataframe #

import 'package:ml_dataframe/ml_dataframe';

void main() {
  final data = DataFrame([
    ['col_1', 'col_2', 'col_3'],
    [      2,      20,     200],
    [      3,      30,     300],
    [      4,      40,     400],
  ]);
  // the first generic type ia a type of the source value, the second generic type is a type of the mapped value
  final modifiedData = data.map<num, num>((value) => value * 2);
    
  print(modifiedData);
  // DataFrame (3 x 3)
  // col_1 col_2 col_3
  //     4    40   400
  //     6    60   600
  //     8    80   800
}

Map values of a specific dataframe series #

import 'package:ml_dataframe/ml_dataframe';

void main() {
  final data = DataFrame([
    ['col_1', 'col_2', 'col_3'],
    [      2,      20,     200],
    [      3,      30,     300],
    [      4,      40,     400],
  ]);
  // the first generic type ia a type of the source value, the second generic type is a type of the mapped value
  final modifiedData = data.mapSeries<num, num>((value) => value * 2, name: 'col_2');
    
  print(modifiedData);
  // DataFrame (3 x 3)
  // col_1 col_2 col_3
  //     2    40   200
  //     3    60   300
  //     4    80   400
}

Ways to create a dataframe #

DataFrame constructor #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final data = [
    ['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',         'Species'],
    [   1,             5.1,            3.5,             1.4,            0.2,     'Iris-setosa'],
    [   2,             4.9,            3.0,             1.4,            0.2,     'Iris-setosa'],
    [  89,             5.6,            3.0,             4.1,            1.3, 'Iris-versicolor'],
    [  90,             5.5,            2.5,             4.0,            1.3, 'Iris-versicolor'],
    [  91,             5.5,            2.6,             4.4,            1.2, 'Iris-versicolor'],
  ];

  final dataframe = DataFrame(data);
}

By default, the very first row is considered a header. If the data does not have a header, one can use autogenerated header by providing headerExists: false config to the constructor:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final data = [
    [1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'],
    [2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'],
    [89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'],
    [90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'],
    [91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'],
  ];

  final dataframe = DataFrame(data, headerExists: false);

  print(dataframe.header);
}

It outputs ['col_1', 'col_2', 'col_3', 'col_4', 'col_5', 'col_6']. col_ is a default prefix for the autogenerated columns.

Also, if there are no header row in the data, one can use a predefined header:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final data = [
    [1, 5.1, 3.5, 1.4, 0.2, 'Iris-setosa'],
    [2, 4.9, 3.0, 1.4, 0.2, 'Iris-setosa'],
    [89, 5.6, 3.0, 4.1, 1.3, 'Iris-versicolor'],
    [90, 5.5, 2.5, 4.0, 1.3, 'Iris-versicolor'],
    [91, 5.5, 2.6, 4.4, 1.2, 'Iris-versicolor'],
  ];

  final dataframe = DataFrame(data, header: ['feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5', 'feature_6']);
}

fromCsv function #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() async {
  final data = await fromCsv('path/to/csv/file.csv');
}

If the csv file does not have a header row, it's needed to provide the corresponding flag:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() async {
  final data = await fromCsv('path/to/csv/file.csv', headerExists: false);
}

Restore a dataframe previously persisted as a json file - fromJson function #

import 'package:ml_dataframe/ml_dataframe.dart';

void main() async {
  final data = await fromJson('path/to/json/file.json');
}

This function works in conjunction with DataFrame saveAsJson method.

Dataframes with prefilled data #

In order to test data processing algorithms, one can use "toy" datasets. The library exposes several of them:

Iris dataset - function getIrisDataFrame #

One can create a dataframe filled with Iris data:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final data = getIrisDataFrame();

  print(data);
  // DataFrame (150 x 6)
  // Id SepalLengthCm SepalWidthCm PetalLengthCm PetalWidthCm Species
  // ...
}

Pima Indians diabetes dataset - function getPimaIndiansDiabetesDataFrame #

One can create a dataframe filled with Pima Indians diabetes data:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final data = getPimaIndiansDiabetesDataFrame();

  print(data);
  // DataFrame (768 x 9)
  // Pregnancies Glucose BloodPressure SkinThickness Insulin BMI DiabetesPedigreeFunction Age Outcome
  // ...
}

Red wine quality dataset - function getWineQualityDataframe #

One can create a dataframe filled with Red wine quality data:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final data = getWineQualityDataframe();

  print(data);
  // DataFrame (1599 x 12)
  // fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
  // ...
}

Boston housing dataset - function getHousingDataframe #

One can create a dataframe filled with Boston housing data:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() {
  final data = getHousingDataframe();

  print(data);
  // DataFrame (506 x 14)
  //    CRIM     ZN   INDUS   CHAS     NOX      RM   ...   MEDV
  // 0.00632   18.0    2.31      0   0.538   6.575   ...   24.0
  // 0.02731    0.0    7.07      0   0.469   6.421   ...   21.6
  // 0.02729    0.0    7.07      0   0.469   7.185   ...   34.7
  // 0.03237    0.0    2.18      0   0.458   6.998   ...   33.4
  // 0.06905    0.0    2.18      0   0.458   7.147   ...   36.2
  //     ...    ...     ...    ...     ...     ...   ...    ...
  // 0.06263    0.0   11.93      0   0.573   6.593   ...   22.4
  // 0.04527    0.0   11.93      0   0.573    6.12   ...   20.6
  // 0.06076    0.0   11.93      0   0.573   6.976   ...   23.9
  // 0.10959    0.0   11.93      0   0.573   6.794   ...   22.0
  // 0.04741    0.0   11.93      0   0.573    6.03   ...   11.9
}

Contacts #

If you have questions, feel free to text me on

17
likes
135
points
1.38k
downloads

Publisher

verified publisherml-algo.com

Weekly Downloads

An in-memory untyped data storage with the possibility to query and modify it

Repository (GitHub)
View/report issues

Documentation

API reference

License

BSD-2-Clause (license)

Dependencies

csv, json_annotation, ml_linalg, quiver

More

Packages that depend on ml_dataframe