matVecMulGPU function

GPUTensor<Vector> matVecMulGPU(
  1. GPUTensor<Matrix> mMat,
  2. GPUTensor<Vector> v,
  3. CommandBuffer tape
)

Implementation

GPUTensor<Vector> matVecMulGPU(GPUTensor<Matrix> mMat, GPUTensor<Vector> v, CommandBuffer tape) {
  int numRows = mMat.shape[0];
  int numCols = mMat.shape[1];

  GPUTensor<Vector> out = GPUTensor<Vector>(List<double>.filled(numRows, 0.0));

  tape.putInt(OP_MATMUL);
  tape.putString(mMat.id);
  tape.putString(v.id);
  tape.putString(out.id);
  tape.putBool(false);
  tape.putBool(false);
  tape.putFloat(1.0);
  tape.putFloat(0.0);
  tape.putBool(false);

  out.creator = GPUNode(
    [mMat, v],
        (CommandBuffer bTape) {
      bTape.putInt(OP_MATMUL);
      bTape.putString('${out.id}_grad');
      bTape.putString(v.id);
      bTape.putString('${mMat.id}_grad');
      bTape.putBool(false);
      bTape.putBool(true);
      bTape.putFloat(1.0);
      bTape.putFloat(1.0);
      bTape.putBool(false);

      bTape.putInt(OP_MATMUL);
      bTape.putString(mMat.id);
      bTape.putString('${out.id}_grad');
      bTape.putString('${v.id}_grad');
      bTape.putBool(true);
      bTape.putBool(false);
      bTape.putFloat(1.0);
      bTape.putFloat(1.0);
      bTape.putBool(false);
    },
    opName: 'matVecMulGPU',
    cost: 2 * numRows * numCols,
  );

  return out;
}