generateMuZeroPure function
void
generateMuZeroPure()
Pure MuZero Generation: Only uses Representation ONCE, then relies on Dynamics
Implementation
void generateMuZeroPure(
MuZeroGreedyAgent agent,
List<int> prompt,
int maxLength,
Map<int, String> itos,
) {
final List<int> generated = List.from(prompt);
print("--- Running Pure Dynamics Generation (True MuZero Test) ---");
final List<Tensor> initTracker = [];
// 1. Initial Representation (h)
final Tensor rawInitState = agent.representation(prompt, initTracker);
Tensor currentState = rawInitState.detach(); // Ownership transfer
for (var t in initTracker) t.dispose();
for (int i = 0; i < maxLength; i++) {
final List<Tensor> stepTracker = [];
// 2. Predict: p = f(s)
final Tensor logits = agent.predictPolicy(currentState, stepTracker);
// 3. Select Action
final int bestToken = argMax(logits.fetchData());
generated.add(bestToken);
print("Step ${i.toString().padLeft(2)} -> ${itos[bestToken]}");
if (bestToken == 6) break; // "." is the stop token
// 4. Transition: s = g(s, a)
Tensor nextStateRaw = agent.dynamics(
currentState,
bestToken,
i + 1,
stepTracker,
);
Tensor nextState = nextStateRaw.detach();
// 5. Cleanup current step
currentState.dispose();
for (var t in stepTracker) t.dispose();
currentState = nextState;
}
currentState.dispose();
print("\nFinal Result: ${generated.map((id) => itos[id]).join(" ")}");
}