AudioTransformer constructor

AudioTransformer({
  1. required int featureDim,
  2. required int maxSequenceLength,
  3. required int embedSize,
  4. required int numClasses,
  5. int numLayers = 4,
  6. int numHeads = 4,
})

Implementation

AudioTransformer({
  required this.featureDim,
  required this.maxSequenceLength,
  required this.embedSize,
  required this.numClasses,
  int numLayers = 4,
  int numHeads = 4,
}) : transformerEncoder = TransformerEncoder(
       vocabSize: 0,
       embedSize: embedSize,
       blockSize: maxSequenceLength,
       numLayers: numLayers,
       numHeads: numHeads,
     ) {
  // 1. Audio Feature Projection (similar to patchProjection)
  featureProjection = Layer(featureDim, embedSize, useGelu: true);

  // 2. Positional Embeddings for the audio timeline
  posEmbeddings = Tensor.random([maxSequenceLength, embedSize], scale: 0.02);

  // 3. Output Head
  classificationHead = Layer(embedSize, numClasses, useGelu: false);
}