TransformerEncoder constructor

TransformerEncoder({
  1. int vocabSize = 100,
  2. int embedSize = 64,
  3. int blockSize = 128,
  4. int numLayers = 6,
  5. int numHeads = 8,
})

Implementation

TransformerEncoder({
  this.vocabSize = 100, // Default vocabulary size
  this.embedSize = 64, // Default embedding dimension
  this.blockSize = 128, // Default maximum sequence length
  this.numLayers = 6, // Default number of encoder layers
  this.numHeads = 8, // Default number of attention heads
})  : assert(embedSize % numHeads == 0,
          "embedSize must be divisible by numHeads"),
      // Initialize token embeddings with small random values
      tokenEmbeddings = List.generate(
          vocabSize,
          (i) => ValueVector.fromDoubleList(List.generate(
              embedSize, (j) => math.Random().nextDouble() * 0.02 - 0.01))),
      // Initialize position embeddings (also learned)
      positionEmbeddings = List.generate(
          blockSize,
          (i) => ValueVector.fromDoubleList(List.generate(
              embedSize, (j) => math.Random().nextDouble() * 0.02 - 0.01))),
      // Create a stack of encoder blocks
      blocks = List.generate(
          numLayers, (i) => TransformerEncoderBlock(embedSize, numHeads)),
      // Final layer normalization applied after all blocks
      finalLayerNorm = LayerNorm(embedSize);