MultimodalTransformer constructor
MultimodalTransformer({
- required AudioTransformer audioModel,
- required VideoTransformer videoModel,
- required TextTransformer textModel,
- required int numClasses,
Implementation
MultimodalTransformer({
required this.audioModel,
required this.videoModel,
required this.textModel,
required int numClasses,
}) : fusionLayer = Layer(
// Logic: Audio(64) + Video(128) + Text(128) = 320
audioModel.embedSize + videoModel.embedSize + textModel.embedSize,
numClasses,
useGelu: false,
);