valueIteration method

Map<String, dynamic> valueIteration({
  1. double gamma = 0.99,
  2. double tol = 1e-6,
  3. int maxIter = 10000,
})

Value iteration: returns pair (values, policy)

Implementation

Map<String, dynamic> valueIteration({
  double gamma = 0.99,
  double tol = 1e-6,
  int maxIter = 10000,
}) {
  final V = List<double>.filled(nStates, 0.0);
  for (var it = 0; it < maxIter; it++) {
    var delta = 0.0;
    for (var s = 0; s < nStates; s++) {
      double best = double.negativeInfinity;
      for (var a = 0; a < nActions; a++) {
        var q = 0.0;
        for (var sp = 0; sp < nStates; sp++) {
          q += P[s][a][sp] * (R[s][a][sp] + gamma * V[sp]);
        }
        if (q > best) best = q;
      }
      final diff = (best - V[s]).abs();
      if (diff > delta) delta = diff;
      V[s] = best;
    }
    if (delta < tol) break;
  }
  final policy = List<int>.filled(nStates, 0);
  for (var s = 0; s < nStates; s++) {
    var bestA = 0;
    var bestVal = double.negativeInfinity;
    for (var a = 0; a < nActions; a++) {
      var q = 0.0;
      for (var sp = 0; sp < nStates; sp++) {
        q += P[s][a][sp] * (R[s][a][sp] + gamma * V[sp]);
      }
      if (q > bestVal) {
        bestVal = q;
        bestA = a;
      }
    }
    policy[s] = bestA;
  }
  return {'values': V, 'policy': policy};
}