expFloat32x4 function

Float32x4 expFloat32x4(
  1. Float32x4 entry
)

SIMD Optimized Exponential function.

Author: Graciliano Monteiro Passos

Implementation

Float32x4 expFloat32x4(Float32x4 entry) {
  entry = entry.clamp(_float32x4__87, _float32x4_87);

  final x = entry.x;
  final y = entry.y;
  final z = entry.z;
  final w = entry.w;

  var xInt = _expXToInt(x);
  var yInt = _expXToInt(y);
  var zInt = _expXToInt(z);
  var wInt = _expXToInt(w);

  final xIdx = expIntTableMaxIndex + xInt;
  final yIdx = expIntTableMaxIndex + yInt;
  final zIdx = expIntTableMaxIndex + zInt;
  final wIdx = expIntTableMaxIndex + wInt;

  // Lookup exp(floor(x)).
  // intPartA will have the upper 22 bits, intPartB will have the lower
  // 52 bits.
  final intPartA = Float32x4(expIntTableA[xIdx], expIntTableA[yIdx],
      expIntTableA[zIdx], expIntTableA[wIdx]);
  final intPartB = Float32x4(expIntTableB[xIdx], expIntTableB[yIdx],
      expIntTableB[zIdx], expIntTableB[wIdx]);

  final entryInt = Float32x4(
      xInt.toDouble(), yInt.toDouble(), zInt.toDouble(), wInt.toDouble());

  // Get the fractional part of x, find the greatest multiple of 2^-10 less than
  // x and look up the exp function of it.
  // fracPartA will have the upper 22 bits, fracPartB the lower 52 bits.
  final frac = ((entry - entryInt) * _float32x4_1024);

  final fracIntX = frac.x.toInt();
  final fracIntY = frac.y.toInt();
  final fracIntZ = frac.z.toInt();
  final fracIntW = frac.w.toInt();

  final fracPartA = Float32x4(expFracTableA[fracIntX], expFracTableA[fracIntY],
      expFracTableA[fracIntZ], expFracTableA[fracIntW]);
  final fracPartB = Float32x4(expFracTableB[fracIntX], expFracTableB[fracIntY],
      expFracTableB[fracIntZ], expFracTableB[fracIntW]);

  // epsilon is the difference in x from the nearest multiple of 2^-10.  It
  // has a value in the range 0 <= epsilon < 2^-10.
  // Do the subtraction from x as the last step to avoid possible
  // loss of precision.
  final epsilon = entry -
      (entryInt +
          Float32x4(
                fracIntX.toDouble(),
                fracIntY.toDouble(),
                fracIntZ.toDouble(),
                fracIntW.toDouble(),
              ) /
              _float32x4_1024);

  // Compute z = exp(epsilon) - 1.0 via a minimax polynomial.  z has
  // full double precision (52 bits).  Since z < 2^-10, we will have
  // 62 bits of precision when combined with the contant 1.  This will be
  // used in the last addition below to get proper rounding.

  // Remez generated polynomial.  Converges on the interval [0, 2^-10], error
  // is less than 0.5 ULP
  var o = _float32x4_0_04168701738764507;
  o = o * epsilon + _float32x4_0_1666666505023083;
  o = o * epsilon + _float32x4_0_5000000000042687;
  o = o * epsilon + _float32x4_1_0;
  o = o * epsilon + _float32x4__3_940510424527919E_20;

  // Compute (intPartA+intPartB) * (fracPartA+fracPartB) by binomial
  // expansion.
  // tempA is exact since intPartA and intPartB only have 22 bits each.
  // tempB will have 52 bits of precision.
  final tempA = intPartA * fracPartA;
  final tempB =
      intPartA * fracPartB + intPartB * fracPartA + intPartB * fracPartB;

  // Compute the result.  (1+z)(tempA+tempB).  Order of operations is
  // important.  For accuracy add by increasing size.  tempA is exact and
  // much larger than the others.  If there are extra bits specified from the
  // pow() function, use them.
  final tempC = tempB + tempA;
  final result = tempC * o + tempB + tempA;

  return result;
}