forward method
Implementation
Tensor forward(
Tensor audio,
Tensor video,
List<int> textTokens,
List<Tensor> tracker,
) {
// 1. Get embeddings (Pooling across sequence dimension to get a single vector per modality)
final a = audioModel.forward(audio, tracker).mean();
final v = videoModel.forward(video, tracker).mean();
final t = textModel.forward(textTokens, tracker).mean();
// 2. Multimodal Fusion via Concatenation
final combined = Tensor.concat([a, v, t]);
tracker.add(combined);
// 3. Final Classification Head
return fusionLayer.forward(combined, tracker);
}