step method

  1. @override
void step()
override

Implementation

@override
void step() {
  _t = _t + 1;
  for (int p = 0; p < parameters.length; p = p + 1) {
    Tensor<dynamic> param = parameters[p];
    List<double> mList = _m[param.id]!;
    List<double> vList = _v[param.id]!;

    for (int i = 0; i < param.data.length; i = i + 1) {
      double grad = param.grad[i];

      // Update moments
      mList[i] = beta1 * mList[i] + (1.0 - beta1) * grad;
      vList[i] = beta2 * vList[i] + (1.0 - beta2) * (grad * grad);

      // Bias correction
      double mHat = mList[i] / (1.0 - pow(beta1, _t));
      double vHat = vList[i] / (1.0 - pow(beta2, _t));

      // Weight decay (decoupled from gradient update)
      param.data[i] = param.data[i] - (learningRate * weightDecay * param.data[i]);

      // Gradient update
      param.data[i] = param.data[i] - (learningRate * mHat) / (sqrt(vHat) + epsilon);
    }
  }
}