update method

void update(
  1. int state,
  2. int action,
  3. double reward,
  4. int nextState,
  5. int nextAction,
)

Implementation

void update(
  int state,
  int action,
  double reward,
  int nextState,
  int nextAction,
) {
  final q = qTable[state][action];
  final qNext = qTable[nextState][nextAction];
  final delta = reward + gamma * qNext - q;

  if (lambda > 0.0 && _eTrace != null) {
    // accumulation traces
    _eTrace![state][action] += 1.0;
    for (var s = 0; s < nStates; s++) {
      for (var a = 0; a < nActions; a++) {
        qTable[s][a] += alpha * delta * _eTrace![s][a];
        _eTrace![s][a] *= gamma * lambda;
      }
    }
  } else {
    qTable[state][action] = q + alpha * delta;
  }

  _steps += 1;
  _applySchedules();
}