im2colGPU function
Implementation
GPUTensor<Matrix> im2colGPU(GPUTensor<dynamic> input, int kH, int kW, CommandBuffer tape) {
int inChannels = input.shape.length == 2 ? 1 : input.shape[0];
int inH = input.shape.length == 2 ? input.shape[0] : input.shape[1];
int inW = input.shape.length == 2 ? input.shape[1] : input.shape[2];
int outH = inH - kH + 1;
int outW = inW - kW + 1;
int rows = inChannels * kH * kW;
int cols = outH * outW;
GPUTensor<Matrix> out = GPUTensor<Matrix>.empty(<int>[rows, cols]);
tape.putInt(OP_IM2COL);
tape.putString(input.id);
tape.putString(out.id);
tape.putInt(kH);
tape.putInt(kW);
out.creator = GPUNode(
<GPUTensor>[input],
(CommandBuffer bTape) {
bTape.putInt(OP_COL2IM);
bTape.putString('${out.id}_grad');
bTape.putString('${input.id}_grad');
bTape.putInt(kH);
bTape.putInt(kW);
},
opName: 'im2colGPU',
cost: inChannels * kH * kW * outH * outW,
);
return out;
}