truncateHeadForPtlRetry static method
Drops the oldest API-round groups from messages until tokenGap
is covered. Returns null when nothing can be dropped without
leaving an empty summarize set.
This is the last-resort escape hatch when the compact request itself hits prompt-too-long — dropping the oldest context is lossy but unblocks the user.
Implementation
static List<Message>? truncateHeadForPtlRetry(
List<Message> messages,
int? tokenGap,
) {
if (messages.length < 2) return null;
// Group messages into API round pairs (user + assistant).
final groups = _groupMessagesByApiRound(messages);
if (groups.length < 2) return null;
int dropCount;
if (tokenGap != null && tokenGap > 0) {
int acc = 0;
dropCount = 0;
for (final group in groups) {
acc += _roughTokenCountForMessages(group);
dropCount++;
if (acc >= tokenGap) break;
}
} else {
dropCount = max(1, (groups.length * 0.2).floor());
}
// Keep at least one group so there is something to summarize.
dropCount = min(dropCount, groups.length - 1);
if (dropCount < 1) return null;
final sliced = groups.sublist(dropCount).expand((g) => g).toList();
// If the first remaining message is an assistant message, prepend a
// synthetic user marker so the API does not reject it.
if (sliced.isNotEmpty && sliced.first.role == MessageRole.assistant) {
return [Message.user(ptlRetryMarker), ...sliced];
}
return sliced;
}