matVecMulGPU function
Implementation
GPUTensor<Vector> matVecMulGPU(GPUTensor<Matrix> mMat, GPUTensor<Vector> v, CommandBuffer tape) {
int numRows = mMat.shape[0];
int numCols = mMat.shape[1];
GPUTensor<Vector> out = GPUTensor<Vector>(List<double>.filled(numRows, 0.0));
tape.putInt(OP_MATMUL);
tape.putString(mMat.id);
tape.putString(v.id);
tape.putString(out.id);
tape.putBool(false);
tape.putBool(false);
tape.putFloat(1.0);
tape.putFloat(0.0);
tape.putBool(false);
out.creator = GPUNode(
[mMat, v],
(CommandBuffer bTape) {
bTape.putInt(OP_MATMUL);
bTape.putString('${out.id}_grad');
bTape.putString(v.id);
bTape.putString('${mMat.id}_grad');
bTape.putBool(false);
bTape.putBool(true);
bTape.putFloat(1.0);
bTape.putFloat(1.0);
bTape.putBool(false);
bTape.putInt(OP_MATMUL);
bTape.putString(mMat.id);
bTape.putString('${out.id}_grad');
bTape.putString('${v.id}_grad');
bTape.putBool(true);
bTape.putBool(false);
bTape.putFloat(1.0);
bTape.putFloat(1.0);
bTape.putBool(false);
},
opName: 'matVecMulGPU',
cost: 2 * numRows * numCols,
);
return out;
}