update method
Implementation
void update(
int state,
int action,
double reward,
int nextState,
int nextAction,
) {
final q = qTable[state][action];
final qNext = qTable[nextState][nextAction];
final delta = reward + gamma * qNext - q;
if (lambda > 0.0 && _eTrace != null) {
// accumulation traces
_eTrace![state][action] += 1.0;
for (var s = 0; s < nStates; s++) {
for (var a = 0; a < nActions; a++) {
qTable[s][a] += alpha * delta * _eTrace![s][a];
_eTrace![s][a] *= gamma * lambda;
}
}
} else {
qTable[state][action] = q + alpha * delta;
}
_steps += 1;
_applySchedules();
}