update method
void
update(})
Implementation
void update(
List<double> state,
int action,
double reward,
List<double> nextState, {
double actorLr = 0.01,
double criticLr = 0.01,
}) {
final v = critic.predict([state])[0][0];
final vNext = critic.predict([nextState])[0][0];
final td = reward + 0.99 * vNext - v;
// critic target
critic.fit(
[state],
[
[v + td],
],
);
// actor target: increase logit for action proportionally to td
final target = List<double>.filled(nActions, 0.0);
target[action] = td;
actor.fit([state], [target]);
}