sumReduceColumnsGPU function
Implementation
GPUTensor<Vector> sumReduceColumnsGPU(GPUTensor<Matrix> m, CommandBuffer tape) {
int cols = m.shape[1];
List<int> outShape = <int>[cols];
GPUTensor<Vector> out = GPUTensor<Vector>.empty(outShape);
tape.putInt(OP_SUM_REDUCE_COLUMNS);
tape.putString(m.id);
tape.putString(out.id);
out.creator = GPUNode(
<GPUTensor>[m],
(CommandBuffer bTape) {
// The backward pass of reducing columns is broadcasting the gradient back
bTape.putInt(OP_BROADCAST_ADD);
bTape.putString('${m.id}_grad');
bTape.putString('${out.id}_grad');
bTape.putString('${m.id}_grad');
},
opName: 'sumReduceColumnsGPU',
);
return out;
}