build method

  1. @override
void build(
  1. Tensor input
)
override

Initializes all parameter tensors for the four gates.

This method infers the inputSize from the data and creates the weight matrices, each with a shape of [hiddenSize, hiddenSize + inputSize] to handle the concatenated [h_prev, x_t] input. It uses Glorot/Xavier initialization, a standard practice for LSTMs.

Implementation

@override
void build(Tensor<dynamic> input) {
  Matrix inputMatrix = input.value as Matrix;
  int inputSize = inputMatrix.isNotEmpty ? inputMatrix[0].length : 0;
  int combinedSize = hiddenSize + inputSize;
  Random random = Random();

  Tensor<Matrix> initWeights(int fanIn, int fanOut) {
    double stddev = sqrt(1.0 / fanIn);
    Matrix values = [];
    for (int i = 0; i < fanOut; i++) {
      Vector row = [];
      for (int j = 0; j < fanIn; j++) {
        row.add((random.nextDouble() * 2 - 1) * stddev);
      }
      values.add(row);
    }
    return Tensor<Matrix>(values);
  }

  W_f = initWeights(combinedSize, hiddenSize);
  W_i = initWeights(combinedSize, hiddenSize);
  W_c = initWeights(combinedSize, hiddenSize);
  W_o = initWeights(combinedSize, hiddenSize);

  b_f = Tensor<Vector>(List<double>.filled(hiddenSize, 0.0));
  b_i = Tensor<Vector>(List<double>.filled(hiddenSize, 0.0));
  b_c = Tensor<Vector>(List<double>.filled(hiddenSize, 0.0));
  b_o = Tensor<Vector>(List<double>.filled(hiddenSize, 0.0));

  super.build(input);
}