TransformerEncoder constructor

TransformerEncoder({
  1. required int vocabSize,
  2. required int embedSize,
  3. required int blockSize,
  4. int numLayers = 6,
  5. int numHeads = 8,
})

Implementation

TransformerEncoder({
  required this.vocabSize,
  required this.embedSize,
  required this.blockSize,
  this.numLayers = 6,
  this.numHeads = 8,
}) : assert(
       embedSize % numHeads == 0,
       "embedSize must be divisible by numHeads",
     ),
     // Initialize directly on GPU with scaling
     wte = Tensor.random([vocabSize, embedSize], scale: 0.02),
     wpe = Tensor.random([blockSize, embedSize], scale: 0.02),
     blocks = List.generate(
       numLayers,
       (i) => TransformerEncoderBlock(embedSize, numHeads, blockSize),
     ),
     finalLayerNorm = LayerNorm(embedSize);