computeCostMatrix method
Implementation
Tensor computeCostMatrix(Tensor gtBoxes) {
// 1. Validation
if (shape[1] != 4 || gtBoxes.shape[1] != 4) {
throw ArgumentError("Both tensors must have 4 columns (x, y, w, h)");
}
// 2. Prepare the destination tensor on the GPU
final int numQueries = shape[0];
final int numGT = gtBoxes.shape[0];
final costMatrix = Tensor.fill([numQueries, numGT], 0.0);
// 3. Call the engine using the internal handles
// 'gpu' here refers to your CudaEngine singleton/instance
engine.computeCostMatrix(this.handle, gtBoxes.handle, costMatrix.handle);
return costMatrix;
}