layerNormVector function
Implementation
Tensor<Vector> layerNormVector(Tensor<Vector> v, Tensor<Vector> gamma, Tensor<Vector> beta, {double epsilon = 1e-5}) {
int numFeatures = v.value.length;
double mean = 0;
for (double val in v.value) { mean += val; }
mean /= numFeatures;
double variance = 0;
for (double val in v.value) { variance += pow(val - mean, 2); }
variance /= numFeatures;
Vector normalizedVector = [];
for (double val in v.value) {
normalizedVector.add((val - mean) / sqrt(variance + epsilon));
}
Vector outValue = [];
for (int c = 0; c < numFeatures; c++) {
outValue.add(gamma.value[c] * normalizedVector[c] + beta.value[c]);
}
Tensor<Vector> out = Tensor<Vector>(outValue);
int cost = numFeatures * 8;
out.creator = Node([v, gamma, beta], () {
Vector grad_x_hat = [];
for(int c=0; c < numFeatures; c++){
grad_x_hat.add(out.grad[c] * gamma.value[c]);
gamma.grad[c] += out.grad[c] * normalizedVector[c];
beta.grad[c] += out.grad[c];
}
double sum_grad_x_hat = 0;
for (double val in grad_x_hat) { sum_grad_x_hat += val; }
double dot_product_term = 0;
for (int c = 0; c < numFeatures; c++) {
dot_product_term += grad_x_hat[c] * normalizedVector[c];
}
for (int c = 0; c < numFeatures; c++) {
double term1 = numFeatures * grad_x_hat[c];
double term2 = sum_grad_x_hat;
double term3 = normalizedVector[c] * dot_product_term;
double total_grad = (1.0 / (numFeatures * sqrt(variance + epsilon))) * (term1 - term2 - term3);
v.grad[c] += total_grad;
}
}, opName: 'layer_norm_vector', cost: cost);
return out;
}