build method
Initializes all parameter tensors for the four gates.
This method infers the inputSize from the data and creates the weight
matrices, each with a shape of [hiddenSize, hiddenSize + inputSize] to
handle the concatenated [h_prev, x_t] input. It uses Glorot/Xavier
initialization, a standard practice for LSTMs.
Implementation
@override
void build(Tensor<dynamic> input) {
Matrix inputMatrix = input.value as Matrix;
int inputSize = inputMatrix.isNotEmpty ? inputMatrix[0].length : 0;
int combinedSize = hiddenSize + inputSize;
Random random = Random();
Tensor<Matrix> initWeights(int fanIn, int fanOut) {
double stddev = sqrt(1.0 / fanIn);
Matrix values = [];
for (int i = 0; i < fanOut; i++) {
Vector row = [];
for (int j = 0; j < fanIn; j++) {
row.add((random.nextDouble() * 2 - 1) * stddev);
}
values.add(row);
}
return Tensor<Matrix>(values);
}
W_f = initWeights(combinedSize, hiddenSize);
W_i = initWeights(combinedSize, hiddenSize);
W_c = initWeights(combinedSize, hiddenSize);
W_o = initWeights(combinedSize, hiddenSize);
b_f = Tensor<Vector>(List<double>.filled(hiddenSize, 0.0));
b_i = Tensor<Vector>(List<double>.filled(hiddenSize, 0.0));
b_c = Tensor<Vector>(List<double>.filled(hiddenSize, 0.0));
b_o = Tensor<Vector>(List<double>.filled(hiddenSize, 0.0));
super.build(input);
}