layerNormMatrixGPU function

GPUTensor<Matrix> layerNormMatrixGPU(
  1. GPUTensor<Matrix> m,
  2. GPUTensor<Vector> gamma,
  3. GPUTensor<Vector> beta,
  4. GPUTensor<Vector> meanCache,
  5. GPUTensor<Vector> rstdCache,
  6. double epsilon,
  7. CommandBuffer tape,
)

Implementation

GPUTensor<Matrix> layerNormMatrixGPU(
    GPUTensor<Matrix> m,
    GPUTensor<Vector> gamma,
    GPUTensor<Vector> beta,
    GPUTensor<Vector> meanCache,
    GPUTensor<Vector> rstdCache,
    double epsilon,
    CommandBuffer tape,
    ) {
  int numRows = m.shape[0];
  int numCols = m.shape[1];

  GPUTensor<Matrix> out = GPUTensor<Matrix>.empty(<int>[numRows, numCols]);

  tape.putInt(OP_LAYER_NORM_FORWARD);
  tape.putString(m.id);
  tape.putString(gamma.id);
  tape.putString(beta.id);
  tape.putString(out.id);
  tape.putString(meanCache.id);
  tape.putString(rstdCache.id);
  tape.putFloat(epsilon);

  out.creator = GPUNode(
    [m, gamma, beta],
        (CommandBuffer bTape) {
      bTape.putInt(OP_LAYER_NORM_BACKWARD);
      bTape.putString('${out.id}_grad');
      bTape.putString(m.id);
      bTape.putString(gamma.id);
      bTape.putString(meanCache.id);
      bTape.putString(rstdCache.id);
      bTape.putString('${m.id}_grad');
      bTape.putString('${gamma.id}_grad');
      bTape.putString('${beta.id}_grad');
    },
    opName: 'layerNormMatrixGPU',
    cost: numRows * numCols * 8,
  );

  return out;
}