resample method
Resample time series data to a different frequency.
This method provides functionality similar to pandas' resample() method, allowing you to change the frequency of time series data and apply aggregation functions.
Parameters:
frequency: Target frequency string ('D', 'H', 'M', 'Y')dateColumn: Name of the column containing DateTime values (optional if DataFrame has DateTimeIndex)aggFunc: Aggregation function to apply ('mean', 'sum', 'min', 'max', 'count', 'first', 'last')label: Which bin edge label to use ('left' or 'right')closed: Which side of bin interval is closed ('left' or 'right')
Returns: A new DataFrame with resampled data
Example:
// Resample daily data to monthly, taking the mean
var monthlyData = df.resample('M', dateColumn: 'date', aggFunc: 'mean');
// Resample hourly data to daily, taking the sum
var dailyData = df.resample('D', dateColumn: 'timestamp', aggFunc: 'sum');
Implementation
DataFrame resample(
String frequency, {
String? dateColumn,
String aggFunc = 'mean',
String label = 'left',
String closed = 'left',
}) {
// Validate frequency
if (!FrequencyUtils.isValidFrequency(frequency)) {
throw ArgumentError('Invalid frequency: $frequency');
}
// Find the date column
String? actualDateColumn = dateColumn;
if (actualDateColumn == null) {
// Try to find a DateTime column automatically
for (final col in columns) {
final series = this[col];
if (series.data.isNotEmpty && series.data.first is DateTime) {
actualDateColumn = col;
break;
}
}
}
if (actualDateColumn == null) {
throw ArgumentError(
'No date column specified and no DateTime column found automatically');
}
if (!columns.contains(actualDateColumn)) {
throw ArgumentError(
'Date column "$actualDateColumn" not found in DataFrame');
}
final dateSeriesData = this[actualDateColumn].data;
// Validate that the date column contains DateTime objects
final dateTimes = <DateTime>[];
for (int i = 0; i < dateSeriesData.length; i++) {
final value = dateSeriesData[i];
if (value is DateTime) {
dateTimes.add(value);
} else if (value != null) {
throw ArgumentError(
'Date column "$actualDateColumn" contains non-DateTime values');
}
}
if (dateTimes.isEmpty) {
throw ArgumentError('No valid DateTime values found in date column');
}
// Create time series index from the date column
final sortedDates = List<DateTime>.from(dateTimes)..sort();
final timeIndex = TimeSeriesIndex(sortedDates);
// Determine the resampling range
final startDate = timeIndex.first;
final endDate = timeIndex.last;
// Create the target frequency index
final targetIndex = TimeSeriesIndex.dateRange(
start: startDate,
end: endDate,
frequency: frequency,
);
// Group data by time periods
final groups = <DateTime, List<int>>{};
for (int i = 0; i < dateSeriesData.length; i++) {
final dateValue = dateSeriesData[i];
if (dateValue is DateTime) {
final binDate = _findBin(dateValue, targetIndex.timestamps, closed);
if (binDate != null) {
groups.putIfAbsent(binDate, () => []).add(i);
}
}
}
// Apply aggregation function to each group
final resultData = <List<dynamic>>[];
final resultIndex = <DateTime>[];
for (final binDate in targetIndex.timestamps) {
final rowIndices = groups[binDate] ?? [];
if (rowIndices.isNotEmpty) {
final aggregatedRow = <dynamic>[];
for (final col in columns) {
if (col == actualDateColumn) {
// For the date column, use the bin date
aggregatedRow.add(binDate);
} else {
// Apply aggregation function to other columns
final values = rowIndices
.map((idx) => this[col].data[idx])
.where((val) => val != null && val != replaceMissingValueWith)
.toList();
final aggregatedValue = _applyAggregation(values, aggFunc);
aggregatedRow.add(aggregatedValue);
}
}
resultData.add(aggregatedRow);
resultIndex.add(binDate);
}
}
return DataFrame(
resultData,
columns: columns,
index: resultIndex,
allowFlexibleColumns: allowFlexibleColumns,
replaceMissingValueWith: replaceMissingValueWith,
);
}