layerNormVector function

Tensor<Vector> layerNormVector(
  1. Tensor<Vector> v,
  2. Tensor<Vector> gamma,
  3. Tensor<Vector> beta, {
  4. double epsilon = 1e-5,
})

Implementation

Tensor<Vector> layerNormVector(Tensor<Vector> v, Tensor<Vector> gamma, Tensor<Vector> beta, {double epsilon = 1e-5}) {
  int numFeatures = v.value.length;

  double mean = 0;
  for (double val in v.value) { mean += val; }
  mean /= numFeatures;

  double variance = 0;
  for (double val in v.value) { variance += pow(val - mean, 2); }
  variance /= numFeatures;

  Vector normalizedVector = [];
  for (double val in v.value) {
    normalizedVector.add((val - mean) / sqrt(variance + epsilon));
  }

  Vector outValue = [];
  for (int c = 0; c < numFeatures; c++) {
    outValue.add(gamma.value[c] * normalizedVector[c] + beta.value[c]);
  }

  Tensor<Vector> out = Tensor<Vector>(outValue);
  int cost = numFeatures * 8;

  out.creator = Node([v, gamma, beta], () {
    Vector grad_x_hat = [];
    for(int c=0; c < numFeatures; c++){
      grad_x_hat.add(out.grad[c] * gamma.value[c]);
      gamma.grad[c] += out.grad[c] * normalizedVector[c];
      beta.grad[c] += out.grad[c];
    }

    double sum_grad_x_hat = 0;
    for (double val in grad_x_hat) { sum_grad_x_hat += val; }

    double dot_product_term = 0;
    for (int c = 0; c < numFeatures; c++) {
      dot_product_term += grad_x_hat[c] * normalizedVector[c];
    }

    for (int c = 0; c < numFeatures; c++) {
      double term1 = numFeatures * grad_x_hat[c];
      double term2 = sum_grad_x_hat;
      double term3 = normalizedVector[c] * dot_product_term;

      double total_grad = (1.0 / (numFeatures * sqrt(variance + epsilon))) * (term1 - term2 - term3);
      v.grad[c] += total_grad;
    }
  }, opName: 'layer_norm_vector', cost: cost);
  return out;
}