conv2dSimpleGPU function

GPUTensor<Matrix> conv2dSimpleGPU(
  1. GPUTensor<Matrix> input,
  2. GPUTensor<Matrix> kernel,
  3. CommandBuffer tape
)

Implementation

GPUTensor<Matrix> conv2dSimpleGPU(
    GPUTensor<Matrix> input,
    GPUTensor<Matrix> kernel,
    CommandBuffer tape) {

  int inH = input.shape[0];
  int inW = input.shape[1];
  int kH = kernel.shape[0];
  int kW = kernel.shape[1];

  int outH = inH - kH + 1;
  int outW = inW - kW + 1;

  GPUTensor<Matrix> out = GPUTensor<Matrix>.empty(<int>[outH, outW]);

  tape.putInt(OP_CONV2D_FORWARD);
  tape.putString(input.id);
  tape.putString(kernel.id);
  tape.putString(out.id);

  out.creator = GPUNode(
    <GPUTensor>[input, kernel],
        (CommandBuffer bTape) {
      bTape.putInt(OP_CONV2D_BACKWARD_INPUT);
      bTape.putString('${out.id}_grad');
      bTape.putString(kernel.id);
      bTape.putString('${input.id}_grad');

      bTape.putInt(OP_CONV2D_BACKWARD_KERNEL);
      bTape.putString(input.id);
      bTape.putString('${out.id}_grad');
      bTape.putString('${kernel.id}_grad');
    },
    opName: 'conv2dSimpleGPU',
    cost: outH * outW * kH * kW,
  );

  return out;
}