DataFrame.fromCsv constructor

DataFrame.fromCsv(
  1. String text, {
  2. String separator = ",",
  3. String quote = '"',
  4. Map<String, ColumnType>? types,
})

A data frame built from a csv string.

Example:

final d = DataFrame.fromCsv("""
 a,b,c
 1,5.1,"red"
 3,2.7,"green"
 5,-0.9,"red"
""");
print(d);

.-.----.-------.
|a|b   |c      |
:-+----+-------:
|1|5.1 |red    |
|3|2.7 |green  |
|5|-0.9|red    |
'-'----'-------'

Implementation

factory DataFrame.fromCsv(
  String text, {
  String separator = ",",
  String quote = '"',
  Map<String, ColumnType>? types,
}) {
  types = types ?? {};
  final splitPattern = RegExp(r"\r?\n"),
      itemPattern = RegExp("""^ *$quote?([^$quote$separator]*)$quote?,?"""),
      lines = [
        for (final line in text.split(splitPattern))
          if (line.trim().isNotEmpty) line.trim()
      ],
      numericColumns = <String, NumericColumn>{},
      categoricColumns = <String, CategoricColumn>{};

  List<String> getCells(String line) {
    final result = <String>[];
    while (line.isNotEmpty && itemPattern.hasMatch(line)) {
      final match = itemPattern.firstMatch(line)!;
      result.add(match.group(1)!.trim());
      line = line.substring(match.end);
    }
    return result;
  }

  final columnNames = getCells(lines.first),
      stringValues = {for (final k in columnNames) k: <String>[]};

  if (columnNames.any((name) => !_columnNameOkay(name))) {
    throw PackhorseError.badColumnName([
      for (final columnName in columnNames)
        "  '${columnName.padRight(20, ".")}' "
            "${_columnNameOkay(columnName) ? "okay" : "not okay"}"
    ].join("\n"));
  }

  for (final row in lines.sublist(1)) {
    final values = getCells(row);
    if (values.length != columnNames.length) {
      throw PackhorseError.badStructure(
          "Csv row with incorrect length:\n  $row");
    }
    for (var i = 0; i < columnNames.length; i++) {
      stringValues[columnNames[i]]!.add(values[i]);
    }
  }

  for (final columnName in columnNames) {
    if (types.containsKey(columnName)) {
      switch (types[columnName]!) {
        case ColumnType.categoric:
          categoricColumns[columnName] =
              CategoricColumn(stringValues[columnName]!);
        case ColumnType.numeric:
          numericColumns[columnName] = NumericColumn(stringValues[columnName]!
              .map((x) => num.tryParse(x) ?? double.nan));
      }
    } else {
      final firstParsedValue = num.tryParse(stringValues[columnName]!.first);
      if (firstParsedValue == null) {
        categoricColumns[columnName] =
            CategoricColumn(stringValues[columnName]!);
      } else {
        numericColumns[columnName] = NumericColumn(stringValues[columnName]!
            .map((x) => num.tryParse(x) ?? double.nan));
      }
    }
  }

  return DataFrame(
    numericColumns: numericColumns,
    categoricColumns: categoricColumns,
  );
}