conv2dSimpleGPU function
GPUTensor<Matrix>
conv2dSimpleGPU(
- GPUTensor<
Matrix> input, - GPUTensor<
Matrix> kernel, - CommandBuffer tape
Implementation
GPUTensor<Matrix> conv2dSimpleGPU(
GPUTensor<Matrix> input,
GPUTensor<Matrix> kernel,
CommandBuffer tape) {
int inH = input.shape[0];
int inW = input.shape[1];
int kH = kernel.shape[0];
int kW = kernel.shape[1];
int outH = inH - kH + 1;
int outW = inW - kW + 1;
GPUTensor<Matrix> out = GPUTensor<Matrix>.empty(<int>[outH, outW]);
tape.putInt(OP_CONV2D_FORWARD);
tape.putString(input.id);
tape.putString(kernel.id);
tape.putString(out.id);
out.creator = GPUNode(
<GPUTensor>[input, kernel],
(CommandBuffer bTape) {
bTape.putInt(OP_CONV2D_BACKWARD_INPUT);
bTape.putString('${out.id}_grad');
bTape.putString(kernel.id);
bTape.putString('${input.id}_grad');
bTape.putInt(OP_CONV2D_BACKWARD_KERNEL);
bTape.putString(input.id);
bTape.putString('${out.id}_grad');
bTape.putString('${kernel.id}_grad');
},
opName: 'conv2dSimpleGPU',
cost: outH * outW * kH * kW,
);
return out;
}