build method
Initializes the W_xh, W_hh, and b_h parameter tensors.
This method infers the inputSize from the input sequence and creates
the weight matrices with the correct shapes. It uses Xavier/Glorot
initialization, which is a good practice for layers with Tanh activations.
Implementation
@override
void build(Tensor<dynamic> input) {
Matrix inputMatrix = input.value as Matrix;
int inputSize = inputMatrix.isNotEmpty ? inputMatrix[0].length : 0;
Random random = Random();
double xavierStdDev(int fanIn, int fanOut) => sqrt(2.0 / (fanIn + fanOut));
double inputToHiddenStdDev = xavierStdDev(inputSize, hiddenSize);
Matrix wXhValues = [];
for (int i = 0; i < hiddenSize; i++) {
Vector row = [];
for (int j = 0; j < inputSize; j++) {
row.add((random.nextDouble() * 2 - 1) * inputToHiddenStdDev);
}
wXhValues.add(row);
}
double hiddenToHiddenStdDev = xavierStdDev(hiddenSize, hiddenSize);
Matrix wHhValues = [];
for (int i = 0; i < hiddenSize; i++) {
Vector row = [];
for (int j = 0; j < hiddenSize; j++) {
row.add((random.nextDouble() * 2 - 1) * hiddenToHiddenStdDev);
}
wHhValues.add(row);
}
W_xh = Tensor<Matrix>(wXhValues);
W_hh = Tensor<Matrix>(wHhValues);
b_h = Tensor<Vector>(List<double>.filled(hiddenSize, 0.0));
super.build(input);
}