forward method
Forward pass performing 'Intermediate Fusion' on GPU
Implementation
Tensor forward(Tensor audioIn, Tensor videoIn, List<Tensor> tracker) {
// 1. Extract Audio Features
final audioEmbed = audioModel.featureProjection.forward(audioIn, tracker);
final aPos = audioModel.posEmbeddings.slice(0, audioIn.shape[0]);
final aCombined = audioEmbed + aPos;
tracker.add(aCombined);
final aEncoded = audioModel.transformerEncoder.forwardEmbeddings(
aCombined,
tracker,
);
final aPooled = aEncoded.mean(); // [1, 64]
tracker.add(aPooled);
// 2. Extract Video Features
Tensor vProjected = videoModel.frameProjection != null
? videoModel.frameProjection!.forward(videoIn, tracker)
: videoIn;
final vPos = videoModel.posEmbeddings.slice(0, videoIn.shape[0]);
final vCombined = vProjected + vPos;
tracker.add(vCombined);
final vEncoded = videoModel.transformerEncoder.forwardEmbeddings(
vCombined,
tracker,
);
final vPooled = vEncoded.mean(); // [1, 128]
tracker.add(vPooled);
// 3. Fusion via Concatenation [1, 192]
final fused = Tensor.concat([aPooled, vPooled]);
tracker.add(fused);
// 4. Final Classification
return fusionHead.forward(fused, tracker);
}