getFaceEmbedding method

Tensor getFaceEmbedding(
  1. Tensor patchifiedImage,
  2. List<Tensor> tracker
)

Input: numPatches, patchPixels Output: 1, outputDim L2-normalized vector Input: numPatches, patchPixels Output: 1, outputDim L2-normalized vector

Implementation

Tensor getFaceEmbedding(Tensor patchifiedImage, List<Tensor> tracker) {
  // 1. Get sequence output from ViT backbone
  final encoded = backbone.forward(patchifiedImage, tracker);
  tracker.add(encoded);

  // 2. Extract the [CLS] token (assumed to be the first row)
  final clsOutput = encoded.slice(0, 1);
  tracker.add(clsOutput);

  Tensor faceVec = clsOutput;

  // 3. Project to the embedding dimension (e.g., 512)
  if (finalProjection != null) {
    faceVec = finalProjection!.forward(clsOutput, tracker);
    tracker.add(faceVec);
  }

  // 4. Native L2 Normalization
  // Replaces the manual math with a single optimized GPU kernel call
  final normVec = faceVec.normalize(eps: 1e-10);
  tracker.add(normVec);

  return normVec;
}