multiplyGPU<T> function

GPUTensor<T> multiplyGPU<T>(
  1. GPUTensor<T> a,
  2. GPUTensor<T> b,
  3. CommandBuffer tape
)

Implementation

GPUTensor<T> multiplyGPU<T>(GPUTensor<T> a, GPUTensor<T> b, CommandBuffer tape) {
  // Correctly inherit shape to pre-allocate VRAM
  GPUTensor<T> out = GPUTensor<T>.empty(a.shape);

  tape.putInt(OP_MULTIPLY);
  tape.putString(a.id);
  tape.putString(b.id);
  tape.putString(out.id);

  out.creator = GPUNode(
    <GPUTensor>[a, b],
        (CommandBuffer bTape) {
      bTape.putInt(OP_MULTIPLY_BACKWARD);
      bTape.putString('${out.id}_grad');
      bTape.putString(b.id);
      bTape.putString('${a.id}_grad');

      bTape.putInt(OP_MULTIPLY_BACKWARD);
      bTape.putString('${out.id}_grad');
      bTape.putString(a.id);
      bTape.putString('${b.id}_grad');
    },
    opName: 'multiplyGPU',
    cost: 1,
  );

  return out;
}