layerNormMatrixGPU function
Implementation
GPUTensor<Matrix> layerNormMatrixGPU(
GPUTensor<Matrix> m,
GPUTensor<Vector> gamma,
GPUTensor<Vector> beta,
GPUTensor<Vector> meanCache,
GPUTensor<Vector> rstdCache,
double epsilon,
CommandBuffer tape,
) {
int numRows = m.shape[0];
int numCols = m.shape[1];
GPUTensor<Matrix> out = GPUTensor<Matrix>.empty(<int>[numRows, numCols]);
tape.putInt(OP_LAYER_NORM_FORWARD);
tape.putString(m.id);
tape.putString(gamma.id);
tape.putString(beta.id);
tape.putString(out.id);
tape.putString(meanCache.id);
tape.putString(rstdCache.id);
tape.putFloat(epsilon);
out.creator = GPUNode(
[m, gamma, beta],
(CommandBuffer bTape) {
bTape.putInt(OP_LAYER_NORM_BACKWARD);
bTape.putString('${out.id}_grad');
bTape.putString(m.id);
bTape.putString(gamma.id);
bTape.putString(meanCache.id);
bTape.putString(rstdCache.id);
bTape.putString('${m.id}_grad');
bTape.putString('${gamma.id}_grad');
bTape.putString('${beta.id}_grad');
},
opName: 'layerNormMatrixGPU',
cost: numRows * numCols * 8,
);
return out;
}