matMul function

Tensor<Matrix> matMul(
  1. Tensor<Matrix> a,
  2. Tensor<Matrix> b
)

Implementation

Tensor<Matrix> matMul(Tensor<Matrix> a, Tensor<Matrix> b) {
  int M = a.shape[0];
  int N = a.shape[1];
  int P = b.shape[1];

  Matrix aMat = a.value;
  Matrix bMat = b.value;

  Matrix bT = [];
  for (int i = 0; i < P; i = i + 1) {
    Vector row = [];
    for (int j = 0; j < N; j = j + 1) {
      row.add(bMat[j][i]);
    }
    bT.add(row);
  }

  Matrix outValue = [];
  for (int i = 0; i < M; i = i + 1) {
    Vector rowA = aMat[i];
    Vector outRow = [];
    for (int j = 0; j < P; j = j + 1) {
      Vector rowBT = bT[j];
      double sum = 0.0;
      for (int k = 0; k < N; k = k + 1) {
        sum = sum + rowA[k] * rowBT[k];
      }
      outRow.add(sum);
    }
    outValue.add(outRow);
  }

  Tensor<Matrix> out = Tensor<Matrix>(outValue);
  int cost = 2 * M * N * P;

  out.creator = Node(
    [a, b],
        () {
      for (int i = 0; i < M; i = i + 1) {
        for (int k = 0; k < N; k = k + 1) {
          int aIdx = i * N + k;
          double aGradSum = 0.0;
          for (int j = 0; j < P; j = j + 1) {
            int outIdx = i * P + j;
            int bIdx = k * P + j;
            double gradOut = out.grad[outIdx];
            aGradSum = aGradSum + gradOut * b.data[bIdx];
            b.grad[bIdx] = b.grad[bIdx] + a.data[aIdx] * gradOut;
          }
          a.grad[aIdx] = a.grad[aIdx] + aGradSum;
        }
      }
    },
    opName: 'matMul',
    cost: cost,
  );
  return out;
}