addMatrixGPU function

Implementation

GPUTensor<Matrix> addMatrixGPU(GPUTensor<Matrix> a, GPUTensor<Matrix> b, CommandBuffer tape) {
  int numRows = a.shape[0];
  int numCols = a.shape[1];

  // Initialize with an empty structure to set the shape
  GPUTensor<Matrix> out = GPUTensor<Matrix>.empty(<int>[numRows, numCols]);

  tape.putInt(OP_ADD);
  tape.putString(a.id);
  tape.putString(b.id);
  tape.putString(out.id);

  out.creator = GPUNode(
    [a, b],
        (CommandBuffer bTape) {
      bTape.putInt(OP_ADD_INTO);
      bTape.putString('${out.id}_grad');
      bTape.putString('${a.id}_grad');

      bTape.putInt(OP_ADD_INTO);
      bTape.putString('${out.id}_grad');
      bTape.putString('${b.id}_grad');
    },
    opName: 'add_matrixGPU',
    cost: numRows * numCols,
  );

  return out;
}