MultimodalTransformer constructor
MultimodalTransformer({
- required AudioTransformer audioModel,
- required VideoTransformer videoModel,
- required int numClasses,
Implementation
MultimodalTransformer({
required this.audioModel,
required this.videoModel,
required this.numClasses,
}) : fusionHead = Layer(
audioModel.embedSize + videoModel.embedSize, // 64 + 128 = 192
numClasses,
useGelu: false,
);