forward method

Tensor forward(
  1. Tensor audio,
  2. Tensor video,
  3. List<int> textTokens,
  4. List<Tensor> tracker,
)

Implementation

Tensor forward(
  Tensor audio,
  Tensor video,
  List<int> textTokens,
  List<Tensor> tracker,
) {
  // 1. Get embeddings (Pooling across sequence dimension to get a single vector per modality)
  final a = audioModel.forward(audio, tracker).mean();
  final v = videoModel.forward(video, tracker).mean();
  final t = textModel.forward(textTokens, tracker).mean();

  // 2. Multimodal Fusion via Concatenation
  final combined = Tensor.concat([a, v, t]);
  tracker.add(combined);

  // 3. Final Classification Head
  return fusionLayer.forward(combined, tracker);
}