concatenate3DGPU function

Implementation

GPUTensor<Tensor3D> concatenate3DGPU(GPUTensor<Tensor3D> a, GPUTensor<Tensor3D> b, CommandBuffer tape) {
  int aDepth = a.shape[0];
  int bDepth = b.shape[0];
  int totalDepth = aDepth + bDepth;
  int height = a.shape[1];
  int width = a.shape[2];

  GPUTensor<Tensor3D> out = GPUTensor<Tensor3D>.empty([totalDepth, height, width]);

  tape.putInt(OP_CONCATENATE);
  tape.putString(a.id);
  tape.putString(b.id);
  tape.putString(out.id);
  tape.putInt(0);

  out.creator = GPUNode(
    [a, b],
        (CommandBuffer bTape) {
      bTape.putInt(OP_CONCATENATE_BACKWARD);
      bTape.putString('${out.id}_grad');
      bTape.putString('${a.id}_grad');
      bTape.putString('${b.id}_grad');
      bTape.putInt(0);
      bTape.putInt(aDepth);
    },
    opName: 'concat_3dGPU',
    cost: 0,
  );

  return out;
}