sumReduceColumnsGPU function

GPUTensor<Vector> sumReduceColumnsGPU(
  1. GPUTensor<Matrix> m,
  2. CommandBuffer tape
)

Implementation

GPUTensor<Vector> sumReduceColumnsGPU(GPUTensor<Matrix> m, CommandBuffer tape) {
  int cols = m.shape[1];

  List<int> outShape = <int>[cols];
  GPUTensor<Vector> out = GPUTensor<Vector>.empty(outShape);

  tape.putInt(OP_SUM_REDUCE_COLUMNS);
  tape.putString(m.id);
  tape.putString(out.id);

  out.creator = GPUNode(
    <GPUTensor>[m],
        (CommandBuffer bTape) {
      // The backward pass of reducing columns is broadcasting the gradient back
      bTape.putInt(OP_BROADCAST_ADD);
      bTape.putString('${m.id}_grad');
      bTape.putString('${out.id}_grad');
      bTape.putString('${m.id}_grad');
    },
    opName: 'sumReduceColumnsGPU',
  );

  return out;
}