multiplyGPU<T> function
Implementation
GPUTensor<T> multiplyGPU<T>(GPUTensor<T> a, GPUTensor<T> b, CommandBuffer tape) {
// Correctly inherit shape to pre-allocate VRAM
GPUTensor<T> out = GPUTensor<T>.empty(a.shape);
tape.putInt(OP_MULTIPLY);
tape.putString(a.id);
tape.putString(b.id);
tape.putString(out.id);
out.creator = GPUNode(
<GPUTensor>[a, b],
(CommandBuffer bTape) {
bTape.putInt(OP_MULTIPLY_BACKWARD);
bTape.putString('${out.id}_grad');
bTape.putString(b.id);
bTape.putString('${a.id}_grad');
bTape.putInt(OP_MULTIPLY_BACKWARD);
bTape.putString('${out.id}_grad');
bTape.putString(a.id);
bTape.putString('${b.id}_grad');
},
opName: 'multiplyGPU',
cost: 1,
);
return out;
}