selectAction method
Implementation
int selectAction(List<double> state) {
final logits = policy.predict([state])[0];
// softmax
final maxLogit = logits.reduce(max);
final exps = logits.map((l) => exp(l - maxLogit)).toList();
final sum = exps.reduce((a, b) => a + b);
final probs = exps.map((e) => e / sum).toList();
double r = _rand.nextDouble();
double cum = 0.0;
for (var i = 0; i < probs.length; i++) {
cum += probs[i];
if (r < cum) return i;
}
return probs.length - 1;
}