forward method
Forward pass for the object detection head.
Takes a single ValueVector representing the aggregated image feature
(e.g., the CLS token output from the ViT backbone).
Returns a Map containing:
- 'boxes': List of
numQueriesValueVectors, each of 4 bounding box coordinates - 'logits': List of
numQueriesValueVectors, each of (numClasses + 1) class logits - 'embeddings': List of
numQueriesValueVectors, each ofembeddingDimdimensions
Implementation
Map<String, List<ValueVector>> forward(ValueVector backboneFeature) {
// Predict flattened bounding box coordinates for all queries
final ValueVector rawBboxesFlat =
bboxRegressionHead.forward(backboneFeature);
// Predict flattened class logits for all queries
final ValueVector classLogitsFlat =
classPredictionHead.forward(backboneFeature);
// NEW: Predict flattened face embeddings for all queries
final ValueVector faceEmbeddingsFlat =
faceEmbeddingHead.forward(backboneFeature);
// Reshape flattened outputs into lists of ValueVectors for each query
final List<ValueVector> predictedBoxes = [];
for (int i = 0; i < numQueries; i++) {
predictedBoxes.add(ValueVector(rawBboxesFlat.values
.sublist(i * numBoxCoords, (i + 1) * numBoxCoords)));
}
final List<ValueVector> predictedLogits = [];
for (int i = 0; i < numQueries; i++) {
predictedLogits.add(ValueVector(classLogitsFlat.values
.sublist(i * (numClasses + 1), (i + 1) * (numClasses + 1))));
}
// NEW: Reshape flattened embeddings
final List<ValueVector> predictedEmbeddings = [];
for (int i = 0; i < numQueries; i++) {
predictedEmbeddings.add(ValueVector(faceEmbeddingsFlat.values
.sublist(i * embeddingDim, (i + 1) * embeddingDim)));
}
return {
'boxes': predictedBoxes,
'logits': predictedLogits,
'embeddings': predictedEmbeddings, // Add new output
};
}