sumMatrixGPU function
Implementation
GPUTensor<Scalar> sumMatrixGPU(GPUTensor<Matrix> m, CommandBuffer tape) {
int numRows = m.shape[0];
int numCols = m.shape[1];
GPUTensor<Scalar> out = GPUTensor<Scalar>(0.0);
tape.putInt(OP_SUM_REDUCE);
tape.putString(m.id);
tape.putString(out.id);
out.creator = GPUNode(
[m],
(CommandBuffer bTape) {
bTape.putInt(OP_SUM_REDUCE_BACKWARD);
bTape.putString('${out.id}_grad');
bTape.putString('${m.id}_grad');
},
opName: 'sum_matrixGPU',
cost: numRows * numCols,
);
return out;
}