menu

fromCsv function

dark_modelight_mode

fromCsv function

Future<DataFrame> fromCsv(

String fileName, {
String columnDelimiter = defaultFieldDelimiter,
String eol = '\n',
bool headerExists = true,
Iterable<String> header = const [],
String autoHeaderPrefix = defaultHeaderPrefix,
Iterable<int> columns = const [],
Iterable<String> columnNames = const [],

})

Creates a DataFrame instance from a CSV file

fileName a path to the CSV file

columnDelimiter a delimiter that divides columns in the CSV file. Default value is ,

eol End of line character. Default value is \n

headerExists Whether a header row exists in the CSV file or not. Default value is true. If false, an autogenerated header row will be used

header A custom header row. Meaningless if headerExists is true since the original header row has more priority

autoHeaderPrefix A prefix that will be used in autogenerated header row column names. Default value is col_

columns A collection of indices to pick from the CSV file

columnNames A collection of column names to pick from the CSV file. columns has precedence over columnNames if both of them are specified

Usage examples:

Case 1. A header row doesn't exist

Suppose, one has a CSV file path/to/file.csv with the following content:

10,20,30
11,24,33
12,25,36
13,26,37

and one creates a DataFrame instance from it:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() async {
  final data = await fromCsv('path/to/file.csv', headerExists: false); // prefix `col_` will be used as a default prefix for the column name

  print(data);
  // DataFrame (4x3)
  // col_1 col_2 col_3
  //    10    20    30
  //    11    24    33
  //    12    25    36
  //    13    26    37
}

Case 2. A header row exists

Suppose, one has a CSV file path/to/file.csv with the following content:

feature_1,feature_2,feature_3
10,20,30
11,24,33
12,25,36
13,26,37

and one creates a DataFrame instance from it:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() async {
  final data = await fromCsv('path/to/file.csv'); // default value for `headerExists` is true

  print(data);
  // DataFrame (4x3)
  // feature_1 feature_2 feature_3
  //        10        20        30
  //        11        24        33
  //        12        25        36
  //        13        26        37
}

Case 3. A header row doesn't exist, autogenerated header is used

Suppose, one has a CSV file path/to/file.csv with the following content:

10,20,30
11,24,33
12,25,36
13,26,37

and one creates a DataFrame instance from it:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() async {
  final data = await fromCsv('path/to/file.csv',
    headerExists: false,
    header: ['custom_col_1', 'custom_col_2', 'custom_col_3']);

  print(data);
  // DataFrame (4x3)
  // custom_col_1 custom_col_2 custom_col_3
  //           10           20           30
  //           11           24           33
  //           12           25           36
  //           13           26           37
}

Case 4. Pick specific columns by indices

Suppose, one has a CSV file path/to/file.csv with the following content:

10,20,30
11,24,33
12,25,36
13,26,37

and one creates a DataFrame instance from it:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() async {
  final data = await fromCsv('path/to/file.csv',
    headerExists: false,
    columns: [0, 2]);

  print(data);
  // DataFrame (4x2)
  // col_1 col_3
  //    10    30
  //    11    33
  //    12    36
  //    13    37
}

Case 5. Pick specific columns by names

Suppose, one has a CSV file path/to/file.csv with the following content:

feature_1,feature_2,feature_3
10,20,30
11,24,33
12,25,36
13,26,37

and one creates a DataFrame instance from it:

import 'package:ml_dataframe/ml_dataframe.dart';

void main() async {
  final data = await fromCsv('path/to/file.csv',
    columnNames: ['feature_1', 'feature_3']);

  print(data);
  // DataFrame (4x2)
  // feature_1 feature_3
  //        10        30
  //        11        33
  //        12        36
  //        13        37
}

Implementation

Future<DataFrame> fromCsv(
  String fileName, {
  String columnDelimiter = defaultFieldDelimiter,
  String eol = '\n',
  bool headerExists = true,
  Iterable<String> header = const [],
  String autoHeaderPrefix = defaultHeaderPrefix,
  Iterable<int> columns = const [],
  Iterable<String> columnNames = const [],
}) async {
  final reader = DataReader.csv(fileName, columnDelimiter, eol);
  final data = await reader.extractData();

  return fromRawData(
    data,
    headerExists: headerExists,
    predefinedHeader: header,
    autoHeaderPrefix: autoHeaderPrefix,
    columns: columns,
    columnNames: columnNames,
  );
}