valueIteration method
Value iteration: returns pair (values, policy)
Implementation
Map<String, dynamic> valueIteration({
double gamma = 0.99,
double tol = 1e-6,
int maxIter = 10000,
}) {
final V = List<double>.filled(nStates, 0.0);
for (var it = 0; it < maxIter; it++) {
var delta = 0.0;
for (var s = 0; s < nStates; s++) {
double best = double.negativeInfinity;
for (var a = 0; a < nActions; a++) {
var q = 0.0;
for (var sp = 0; sp < nStates; sp++) {
q += P[s][a][sp] * (R[s][a][sp] + gamma * V[sp]);
}
if (q > best) best = q;
}
final diff = (best - V[s]).abs();
if (diff > delta) delta = diff;
V[s] = best;
}
if (delta < tol) break;
}
final policy = List<int>.filled(nStates, 0);
for (var s = 0; s < nStates; s++) {
var bestA = 0;
var bestVal = double.negativeInfinity;
for (var a = 0; a < nActions; a++) {
var q = 0.0;
for (var sp = 0; sp < nStates; sp++) {
q += P[s][a][sp] * (R[s][a][sp] + gamma * V[sp]);
}
if (q > bestVal) {
bestVal = q;
bestA = a;
}
}
policy[s] = bestA;
}
return {'values': V, 'policy': policy};
}