truncateHeadForPtlRetry method - CompactionService class - compaction_service library

truncateHeadForPtlRetry static method

List<Message>? truncateHeadForPtlRetry(

List<Message> messages,
int? tokenGap

)

Drops the oldest API-round groups from messages until tokenGap is covered. Returns null when nothing can be dropped without leaving an empty summarize set.

This is the last-resort escape hatch when the compact request itself hits prompt-too-long — dropping the oldest context is lossy but unblocks the user.

Implementation

static List<Message>? truncateHeadForPtlRetry(
  List<Message> messages,
  int? tokenGap,
) {
  if (messages.length < 2) return null;

  // Group messages into API round pairs (user + assistant).
  final groups = _groupMessagesByApiRound(messages);
  if (groups.length < 2) return null;

  int dropCount;
  if (tokenGap != null && tokenGap > 0) {
    int acc = 0;
    dropCount = 0;
    for (final group in groups) {
      acc += _roughTokenCountForMessages(group);
      dropCount++;
      if (acc >= tokenGap) break;
    }
  } else {
    dropCount = max(1, (groups.length * 0.2).floor());
  }

  // Keep at least one group so there is something to summarize.
  dropCount = min(dropCount, groups.length - 1);
  if (dropCount < 1) return null;

  final sliced = groups.sublist(dropCount).expand((g) => g).toList();

  // If the first remaining message is an assistant message, prepend a
  // synthetic user marker so the API does not reject it.
  if (sliced.isNotEmpty && sliced.first.role == MessageRole.assistant) {
    return [Message.user(ptlRetryMarker), ...sliced];
  }
  return sliced;
}