resample method

DataFrame resample(
  1. String frequency, {
  2. String? dateColumn,
  3. String aggFunc = 'mean',
  4. String label = 'left',
  5. String closed = 'left',
})

Resample time series data to a different frequency.

This method provides functionality similar to pandas' resample() method, allowing you to change the frequency of time series data and apply aggregation functions.

Parameters:

  • frequency: Target frequency string ('D', 'H', 'M', 'Y')
  • dateColumn: Name of the column containing DateTime values (optional if DataFrame has DateTimeIndex)
  • aggFunc: Aggregation function to apply ('mean', 'sum', 'min', 'max', 'count', 'first', 'last')
  • label: Which bin edge label to use ('left' or 'right')
  • closed: Which side of bin interval is closed ('left' or 'right')

Returns: A new DataFrame with resampled data

Example:

// Resample daily data to monthly, taking the mean
var monthlyData = df.resample('M', dateColumn: 'date', aggFunc: 'mean');

// Resample hourly data to daily, taking the sum
var dailyData = df.resample('D', dateColumn: 'timestamp', aggFunc: 'sum');

Implementation

DataFrame resample(
  String frequency, {
  String? dateColumn,
  String aggFunc = 'mean',
  String label = 'left',
  String closed = 'left',
}) {
  // Validate frequency
  if (!FrequencyUtils.isValidFrequency(frequency)) {
    throw ArgumentError('Invalid frequency: $frequency');
  }

  // Find the date column
  String? actualDateColumn = dateColumn;
  if (actualDateColumn == null) {
    // Try to find a DateTime column automatically
    for (final col in columns) {
      final series = this[col];
      if (series.data.isNotEmpty && series.data.first is DateTime) {
        actualDateColumn = col;
        break;
      }
    }
  }

  if (actualDateColumn == null) {
    throw ArgumentError(
        'No date column specified and no DateTime column found automatically');
  }

  if (!columns.contains(actualDateColumn)) {
    throw ArgumentError(
        'Date column "$actualDateColumn" not found in DataFrame');
  }

  final dateSeriesData = this[actualDateColumn].data;

  // Validate that the date column contains DateTime objects
  final dateTimes = <DateTime>[];
  for (int i = 0; i < dateSeriesData.length; i++) {
    final value = dateSeriesData[i];
    if (value is DateTime) {
      dateTimes.add(value);
    } else if (value != null) {
      throw ArgumentError(
          'Date column "$actualDateColumn" contains non-DateTime values');
    }
  }

  if (dateTimes.isEmpty) {
    throw ArgumentError('No valid DateTime values found in date column');
  }

  // Create time series index from the date column
  final sortedDates = List<DateTime>.from(dateTimes)..sort();
  final timeIndex = TimeSeriesIndex(sortedDates);

  // Determine the resampling range
  final startDate = timeIndex.first;
  final endDate = timeIndex.last;

  // Create the target frequency index
  final targetIndex = TimeSeriesIndex.dateRange(
    start: startDate,
    end: endDate,
    frequency: frequency,
  );

  // Group data by time periods
  final groups = <DateTime, List<int>>{};

  for (int i = 0; i < dateSeriesData.length; i++) {
    final dateValue = dateSeriesData[i];
    if (dateValue is DateTime) {
      final binDate = _findBin(dateValue, targetIndex.timestamps, closed);
      if (binDate != null) {
        groups.putIfAbsent(binDate, () => []).add(i);
      }
    }
  }

  // Apply aggregation function to each group
  final resultData = <List<dynamic>>[];
  final resultIndex = <DateTime>[];

  for (final binDate in targetIndex.timestamps) {
    final rowIndices = groups[binDate] ?? [];

    if (rowIndices.isNotEmpty) {
      final aggregatedRow = <dynamic>[];

      for (final col in columns) {
        if (col == actualDateColumn) {
          // For the date column, use the bin date
          aggregatedRow.add(binDate);
        } else {
          // Apply aggregation function to other columns
          final values = rowIndices
              .map((idx) => this[col].data[idx])
              .where((val) => val != null && val != replaceMissingValueWith)
              .toList();

          final aggregatedValue = _applyAggregation(values, aggFunc);
          aggregatedRow.add(aggregatedValue);
        }
      }

      resultData.add(aggregatedRow);
      resultIndex.add(binDate);
    }
  }

  return DataFrame(
    resultData,
    columns: columns,
    index: resultIndex,
    allowFlexibleColumns: allowFlexibleColumns,
    replaceMissingValueWith: replaceMissingValueWith,
  );
}