VideoTransformer constructor

VideoTransformer({
  1. required int frameEmbedDim,
  2. required int embedSize,
  3. required int maxVideoSequenceLength,
  4. required int numClasses,
  5. int numLayers = 2,
  6. int numHeads = 4,
})

Implementation

VideoTransformer({
  required this.frameEmbedDim,
  required this.embedSize,
  required this.maxVideoSequenceLength,
  required this.numClasses,
  int numLayers = 2,
  int numHeads = 4,
}) : frameProjection = (frameEmbedDim != embedSize)
         ? Layer(frameEmbedDim, embedSize, useGelu: false)
         : null,
     posEmbeddings = Tensor.random([maxVideoSequenceLength, embedSize]),
     transformerEncoder = TransformerEncoder(
       vocabSize: 0, // Added to match standard signature
       embedSize: embedSize,
       blockSize: maxVideoSequenceLength,
       numLayers: numLayers,
       numHeads: numHeads,
     ),
     mlpHead = Layer(embedSize, numClasses, useGelu: false);