16 #ifndef EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
17 #define EIGEN_ARCH_GENERIC_PACKET_MATH_FUNCTIONS_H
33 enum { mantissa_bits = numext::numeric_limits<Scalar>::digits - 1};
34 return pcast<PacketI, Packet>(plogical_shift_right<mantissa_bits>(preinterpret<PacketI>(
pabs(
a))));
44 TotalBits =
sizeof(Scalar) * CHAR_BIT,
45 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
46 ExponentBits =
int(TotalBits) - int(MantissaBits) - 1
50 ~(((ScalarUI(1) << int(ExponentBits)) - ScalarUI(1)) <<
int(MantissaBits));
51 const Packet sign_mantissa_mask = pset1frombits<Packet>(
static_cast<ScalarUI
>(scalar_sign_mantissa_mask));
52 const Packet half = pset1<Packet>(Scalar(0.5));
58 EIGEN_CONSTEXPR ScalarUI scalar_normalization_offset = ScalarUI(
int(MantissaBits) + 1);
60 const Scalar scalar_normalization_factor = Scalar(ScalarUI(1) <<
int(scalar_normalization_offset));
61 const Packet normalization_factor = pset1<Packet>(scalar_normalization_factor);
65 const Scalar scalar_exponent_offset = -Scalar((ScalarUI(1)<<(
int(ExponentBits)-1)) - ScalarUI(2));
66 Packet exponent_offset = pset1<Packet>(scalar_exponent_offset);
67 const Packet normalization_offset = pset1<Packet>(-Scalar(scalar_normalization_offset));
68 exponent_offset =
pselect(is_denormal,
padd(exponent_offset, normalization_offset), exponent_offset);
74 const Scalar scalar_non_finite_exponent = Scalar((ScalarUI(1) <<
int(ExponentBits)) - ScalarUI(1));
75 const Packet non_finite_exponent = pset1<Packet>(scalar_non_finite_exponent);
78 exponent =
pselect(is_zero_or_not_finite, zero,
padd(exponent, exponent_offset));
112 TotalBits =
sizeof(Scalar) * CHAR_BIT,
113 MantissaBits = numext::numeric_limits<Scalar>::digits - 1,
114 ExponentBits =
int(TotalBits) - int(MantissaBits) - 1
117 const Packet max_exponent = pset1<Packet>(Scalar((ScalarI(1)<<
int(ExponentBits)) + ScalarI(
int(MantissaBits) - 1)));
118 const PacketI bias = pset1<PacketI>((ScalarI(1)<<(
int(ExponentBits)-1)) - ScalarI(1));
119 const PacketI e = pcast<Packet, PacketI>(
pmin(
pmax(exponent,
pnegate(max_exponent)), max_exponent));
120 PacketI b = parithmetic_shift_right<2>(e);
138 template<
typename Packet>
166 template <
typename Packet,
bool base2>
173 const Packet cst_1 = pset1<Packet>(1.0f);
174 const Packet cst_neg_half = pset1<Packet>(-0.5f);
176 const Packet cst_min_norm_pos = pset1frombits<Packet>( 0x00800000u);
177 const Packet cst_minus_inf = pset1frombits<Packet>( 0xff800000u);
178 const Packet cst_pos_inf = pset1frombits<Packet>( 0x7f800000u);
181 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.707106781186547524f);
182 const Packet cst_cephes_log_p0 = pset1<Packet>(7.0376836292E-2f);
183 const Packet cst_cephes_log_p1 = pset1<Packet>(-1.1514610310E-1f);
184 const Packet cst_cephes_log_p2 = pset1<Packet>(1.1676998740E-1f);
185 const Packet cst_cephes_log_p3 = pset1<Packet>(-1.2420140846E-1f);
186 const Packet cst_cephes_log_p4 = pset1<Packet>(+1.4249322787E-1f);
187 const Packet cst_cephes_log_p5 = pset1<Packet>(-1.6668057665E-1f);
188 const Packet cst_cephes_log_p6 = pset1<Packet>(+2.0000714765E-1f);
189 const Packet cst_cephes_log_p7 = pset1<Packet>(-2.4999993993E-1f);
190 const Packet cst_cephes_log_p8 = pset1<Packet>(+3.3333331174E-1f);
193 x =
pmax(x, cst_min_norm_pos);
218 y =
pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
219 y1 =
pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
220 y2 =
pmadd(cst_cephes_log_p6, x, cst_cephes_log_p7);
221 y =
pmadd(
y, x, cst_cephes_log_p2);
222 y1 =
pmadd(y1, x, cst_cephes_log_p5);
223 y2 =
pmadd(y2, x, cst_cephes_log_p8);
228 y =
pmadd(cst_neg_half, x2,
y);
234 x =
pmadd(x, cst_log2e, e);
237 x =
pmadd(e, cst_ln2, x);
247 return pselect(iszero_mask, cst_minus_inf,
248 por(
pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));
251 template <
typename Packet>
259 template <
typename Packet>
276 template <
typename Packet,
bool base2>
283 const Packet cst_1 = pset1<Packet>(1.0);
284 const Packet cst_neg_half = pset1<Packet>(-0.5);
286 const Packet cst_min_norm_pos = pset1frombits<Packet>(
static_cast<uint64_t>(0x0010000000000000ull));
287 const Packet cst_minus_inf = pset1frombits<Packet>(
static_cast<uint64_t>(0xfff0000000000000ull));
288 const Packet cst_pos_inf = pset1frombits<Packet>(
static_cast<uint64_t>(0x7ff0000000000000ull));
293 const Packet cst_cephes_SQRTHF = pset1<Packet>(0.70710678118654752440E0);
294 const Packet cst_cephes_log_p0 = pset1<Packet>(1.01875663804580931796E-4);
295 const Packet cst_cephes_log_p1 = pset1<Packet>(4.97494994976747001425E-1);
296 const Packet cst_cephes_log_p2 = pset1<Packet>(4.70579119878881725854E0);
297 const Packet cst_cephes_log_p3 = pset1<Packet>(1.44989225341610930846E1);
298 const Packet cst_cephes_log_p4 = pset1<Packet>(1.79368678507819816313E1);
299 const Packet cst_cephes_log_p5 = pset1<Packet>(7.70838733755885391666E0);
301 const Packet cst_cephes_log_q0 = pset1<Packet>(1.0);
302 const Packet cst_cephes_log_q1 = pset1<Packet>(1.12873587189167450590E1);
303 const Packet cst_cephes_log_q2 = pset1<Packet>(4.52279145837532221105E1);
304 const Packet cst_cephes_log_q3 = pset1<Packet>(8.29875266912776603211E1);
305 const Packet cst_cephes_log_q4 = pset1<Packet>(7.11544750618563894466E1);
306 const Packet cst_cephes_log_q5 = pset1<Packet>(2.31251620126765340583E1);
309 x =
pmax(x, cst_min_norm_pos);
334 y =
pmadd(cst_cephes_log_p0, x, cst_cephes_log_p1);
335 y1 =
pmadd(cst_cephes_log_p3, x, cst_cephes_log_p4);
336 y =
pmadd(
y, x, cst_cephes_log_p2);
337 y1 =
pmadd(y1, x, cst_cephes_log_p5);
340 y =
pmadd(cst_cephes_log_q0, x, cst_cephes_log_q1);
341 y1 =
pmadd(cst_cephes_log_q3, x, cst_cephes_log_q4);
342 y =
pmadd(
y, x, cst_cephes_log_q2);
343 y1 =
pmadd(y1, x, cst_cephes_log_q5);
349 y =
pmadd(cst_neg_half, x2,
y);
355 x =
pmadd(x, cst_log2e, e);
358 x =
pmadd(e, cst_ln2, x);
368 return pselect(iszero_mask, cst_minus_inf,
369 por(
pselect(pos_inf_mask,cst_pos_inf,x), invalid_mask));
372 template <
typename Packet>
380 template <
typename Packet>
391 template<
typename Packet>
395 const Packet one = pset1<Packet>(ScalarType(1));
401 return pselect(
por(small_mask, inf_mask), x, log_large);
407 template<
typename Packet>
411 const Packet one = pset1<Packet>(ScalarType(1));
412 const Packet neg_one = pset1<Packet>(ScalarType(-1));
436 template <
typename Packet>
441 const Packet cst_1 = pset1<Packet>(1.0f);
442 const Packet cst_half = pset1<Packet>(0.5f);
443 const Packet cst_exp_hi = pset1<Packet>( 88.723f);
444 const Packet cst_exp_lo = pset1<Packet>(-88.723f);
446 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.44269504088896341f);
447 const Packet cst_cephes_exp_p0 = pset1<Packet>(1.9875691500E-4f);
448 const Packet cst_cephes_exp_p1 = pset1<Packet>(1.3981999507E-3f);
449 const Packet cst_cephes_exp_p2 = pset1<Packet>(8.3334519073E-3f);
450 const Packet cst_cephes_exp_p3 = pset1<Packet>(4.1665795894E-2f);
451 const Packet cst_cephes_exp_p4 = pset1<Packet>(1.6666665459E-1f);
452 const Packet cst_cephes_exp_p5 = pset1<Packet>(5.0000001201E-1f);
464 const Packet cst_cephes_exp_C1 = pset1<Packet>(-0.693359375f);
465 const Packet cst_cephes_exp_C2 = pset1<Packet>(2.12194440e-4f);
467 r =
pmadd(m, cst_cephes_exp_C2, r);
474 y =
pmadd(cst_cephes_exp_p0, r, cst_cephes_exp_p1);
475 y1 =
pmadd(cst_cephes_exp_p3, r, cst_cephes_exp_p4);
477 y =
pmadd(
y, r, cst_cephes_exp_p2);
478 y1 =
pmadd(y1, r, cst_cephes_exp_p5);
487 template <
typename Packet>
494 const Packet cst_1 = pset1<Packet>(1.0);
495 const Packet cst_2 = pset1<Packet>(2.0);
496 const Packet cst_half = pset1<Packet>(0.5);
498 const Packet cst_exp_hi = pset1<Packet>(709.784);
499 const Packet cst_exp_lo = pset1<Packet>(-709.784);
501 const Packet cst_cephes_LOG2EF = pset1<Packet>(1.4426950408889634073599);
502 const Packet cst_cephes_exp_p0 = pset1<Packet>(1.26177193074810590878e-4);
503 const Packet cst_cephes_exp_p1 = pset1<Packet>(3.02994407707441961300e-2);
504 const Packet cst_cephes_exp_p2 = pset1<Packet>(9.99999999999999999910e-1);
505 const Packet cst_cephes_exp_q0 = pset1<Packet>(3.00198505138664455042e-6);
506 const Packet cst_cephes_exp_q1 = pset1<Packet>(2.52448340349684104192e-3);
507 const Packet cst_cephes_exp_q2 = pset1<Packet>(2.27265548208155028766e-1);
508 const Packet cst_cephes_exp_q3 = pset1<Packet>(2.00000000000000000009e0);
509 const Packet cst_cephes_exp_C1 = pset1<Packet>(0.693145751953125);
510 const Packet cst_cephes_exp_C2 = pset1<Packet>(1.42860682030941723212e-6);
515 x =
pmax(
pmin(x, cst_exp_hi), cst_exp_lo);
517 fx =
pmadd(cst_cephes_LOG2EF, x, cst_half);
525 tmp =
pmul(fx, cst_cephes_exp_C1);
533 Packet px = cst_cephes_exp_p0;
534 px =
pmadd(px, x2, cst_cephes_exp_p1);
535 px =
pmadd(px, x2, cst_cephes_exp_p2);
539 Packet qx = cst_cephes_exp_q0;
540 qx =
pmadd(qx, x2, cst_cephes_exp_q1);
541 qx =
pmadd(qx, x2, cst_cephes_exp_q2);
542 qx =
pmadd(qx, x2, cst_cephes_exp_q3);
548 x =
pmadd(cst_2, x, cst_1);
572 const double pio2_62 = 3.4061215800865545e-19;
577 static const uint32_t two_over_pi [] =
579 0x00000028, 0x000028be, 0x0028be60, 0x28be60db,
580 0xbe60db93, 0x60db9391, 0xdb939105, 0x9391054a,
581 0x91054a7f, 0x054a7f09, 0x4a7f09d5, 0x7f09d5f4,
582 0x09d5f47d, 0xd5f47d4d, 0xf47d4d37, 0x7d4d3770,
583 0x4d377036, 0x377036d8, 0x7036d8a5, 0x36d8a566,
584 0xd8a5664f, 0xa5664f10, 0x664f10e4, 0x4f10e410,
585 0x10e41000, 0xe4100000
588 uint32_t xi = numext::bit_cast<uint32_t>(xf);
595 xi = ((xi & 0x007fffffu)| 0x00800000u) << (e & 0x7);
598 uint32_t twoopi_1 = two_over_pi[i-1];
599 uint32_t twoopi_2 = two_over_pi[i+3];
600 uint32_t twoopi_3 = two_over_pi[i+7];
605 p =
uint64_t(xi) * twoopi_2 + (p >> 32);
606 p = (
uint64_t(xi * twoopi_1) << 32) + p;
609 uint64_t q = (p + zero_dot_five) >> 62;
616 return float(
double(
int64_t(p)) * pio2_62);
619 template<
bool ComputeSine,
typename Packet>
622 #if EIGEN_GNUC_AT_LEAST(4,4) && EIGEN_COMP_GNUC_STRICT
629 const Packet cst_2oPI = pset1<Packet>(0.636619746685028076171875f);
630 const Packet cst_rounding_magic = pset1<Packet>(12582912);
631 const PacketI csti_1 = pset1<PacketI>(1);
632 const Packet cst_sign_mask = pset1frombits<Packet>(0x80000000u);
642 PacketI y_int = preinterpret<PacketI>(y_round);
643 y =
psub(y_round, cst_rounding_magic);
647 #if defined(EIGEN_HAS_SINGLE_INSTRUCTION_MADD)
650 const float huge_th = ComputeSine ? 117435.992f : 71476.0625f;
651 x =
pmadd(
y, pset1<Packet>(-1.57079601287841796875f), x);
652 x =
pmadd(
y, pset1<Packet>(-3.1391647326017846353352069854736328125e-07f), x);
653 x =
pmadd(
y, pset1<Packet>(-5.390302529957764765544681040410068817436695098876953125e-15f), x);
661 const float huge_th = ComputeSine ? 25966.f : 18838.f;
662 x =
pmadd(
y, pset1<Packet>(-1.5703125), x);
664 x =
pmadd(
y, pset1<Packet>(-0.000483989715576171875), x);
666 x =
pmadd(
y, pset1<Packet>(1.62865035235881805419921875e-07), x);
667 x =
pmadd(
y, pset1<Packet>(5.5644315544167710640977020375430583953857421875e-11), x);
686 EIGEN_ALIGN_TO_BOUNDARY(
sizeof(
Packet))
float vals[PacketSize];
687 EIGEN_ALIGN_TO_BOUNDARY(
sizeof(
Packet))
float x_cpy[PacketSize];
688 EIGEN_ALIGN_TO_BOUNDARY(
sizeof(
Packet))
int y_int2[PacketSize];
692 for(
int k=0; k<PacketSize;++k)
698 x = ploadu<Packet>(x_cpy);
699 y_int = ploadu<PacketI>(y_int2);
705 Packet sign_bit = ComputeSine ?
pxor(_x, preinterpret<Packet>(plogical_shift_left<30>(y_int)))
706 : preinterpret<Packet>(plogical_shift_left<30>(
padd(y_int,csti_1)));
707 sign_bit =
pand(sign_bit, cst_sign_mask);
716 Packet y1 = pset1<Packet>(2.4372266125283204019069671630859375e-05f);
717 y1 =
pmadd(y1, x2, pset1<Packet>(-0.00138865201734006404876708984375f ));
718 y1 =
pmadd(y1, x2, pset1<Packet>(0.041666619479656219482421875f ));
719 y1 =
pmadd(y1, x2, pset1<Packet>(-0.5f));
720 y1 =
pmadd(y1, x2, pset1<Packet>(1.f));
730 Packet y2 = pset1<Packet>(-0.0001959234114083702898469196984621021329076029360294342041015625f);
731 y2 =
pmadd(y2, x2, pset1<Packet>( 0.0083326873655616851693794799871284340042620897293090820312500000f));
732 y2 =
pmadd(y2, x2, pset1<Packet>(-0.1666666203982298255503735617821803316473960876464843750000000000f));
734 y2 =
pmadd(y2, x, x);
737 y = ComputeSine ?
pselect(poly_mask,y2,y1)
741 return pxor(
y, sign_bit);
744 template<
typename Packet>
749 return psincos_float<true>(x);
752 template<
typename Packet>
757 return psincos_float<false>(x);
761 template<
typename Packet>
766 typedef typename Scalar::value_type RealScalar;
805 RealPacket a_abs =
pabs(
a.v);
807 RealPacket a_max =
pmax(a_abs, a_abs_flip);
808 RealPacket a_min =
pmin(a_abs, a_abs_flip);
809 RealPacket a_min_zero_mask =
pcmp_eq(a_min,
pzero(a_min));
810 RealPacket a_max_zero_mask =
pcmp_eq(a_max,
pzero(a_max));
811 RealPacket r =
pdiv(a_min, a_max);
812 const RealPacket cst_one = pset1<RealPacket>(RealScalar(1));
815 l =
pselect(a_min_zero_mask, a_max, l);
820 const RealPacket cst_half = pset1<RealPacket>(RealScalar(0.5));
829 Packet positive_real_result;
831 positive_real_result.v =
pselect(real_mask, rho.v, eta);
835 const RealScalar neg_zero = RealScalar(numext::bit_cast<float>(0x80000000u));
836 const RealPacket cst_imag_sign_mask = pset1<Packet>(Scalar(RealScalar(0.0), neg_zero)).v;
837 RealPacket imag_signs =
pand(
a.v, cst_imag_sign_mask);
838 Packet negative_real_result;
840 negative_real_result.v =
por(
pabs(
pcplxflip(positive_real_result).v), imag_signs);
843 Packet negative_real_mask;
845 negative_real_mask.v =
por(negative_real_mask.v,
pcplxflip(negative_real_mask).v);
846 Packet result =
pselect(negative_real_mask, negative_real_result, positive_real_result);
855 is_inf.v =
pcmp_eq(a_abs, cst_pos_inf);
857 is_real_inf.v =
pand(is_inf.v, real_mask);
861 real_inf_result.v =
pmul(a_abs, pset1<Packet>(Scalar(RealScalar(1.0), RealScalar(0.0))).v);
862 real_inf_result.v =
pselect(negative_real_mask.v,
pcplxflip(real_inf_result).v, real_inf_result.v);
865 is_imag_inf.v =
pandnot(is_inf.v, real_mask);
868 imag_inf_result.v =
por(
pand(cst_pos_inf, real_mask),
pandnot(
a.v, real_mask));
870 return pselect(is_imag_inf, imag_inf_result,
871 pselect(is_real_inf, real_inf_result,result));
881 template<
typename Packet>
890 template<
typename Packet>
898 #ifdef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
903 template<
typename Packet>
905 void twoprod(
const Packet& x,
const Packet&
y,
906 Packet& p_hi, Packet& p_lo) {
918 template<
typename Packet>
923 const Scalar shift_scale = Scalar(
uint64_t(1) << shift);
924 const Packet gamma =
pmul(pset1<Packet>(shift_scale + Scalar(1)), x);
926 x_hi =
padd(rho, gamma);
927 x_lo =
psub(x, x_hi);
934 template<
typename Packet>
938 Packet x_hi, x_lo, y_hi, y_lo;
944 p_lo =
pmadd(x_hi, y_lo, p_lo);
945 p_lo =
pmadd(x_lo, y_hi, p_lo);
946 p_lo =
pmadd(x_lo, y_lo, p_lo);
958 template<
typename Packet>
979 template<
typename Packet>
993 template<
typename Packet>
1012 template<
typename Packet>
1031 template<
typename Packet>
1037 twoprod(x_hi, x_lo, y_hi, p_hi_hi, p_hi_lo);
1039 twoprod(x_hi, x_lo, y_lo, p_lo_hi, p_lo_lo);
1040 fast_twosum(p_hi_hi, p_hi_lo, p_lo_hi, p_lo_lo, p_hi, p_lo);
1045 template <
typename Packet>
1050 approx_recip =
pmul(approx_recip, approx_recip);
1061 fast_twosum(pset1<Packet>(Scalar(2)), t1_hi, t2_hi, t2_lo);
1065 twoprod(t3_hi, t3_lo, approx_recip, recip_hi, recip_lo);
1070 template <
typename Scalar>
1072 template <
typename Packet>
1075 log2_x_hi =
plog2(x);
1076 log2_x_lo =
pzero(x);
1088 template <
typename Packet>
1106 const Packet p6 = pset1<Packet>( 9.703654795885e-2f);
1107 const Packet p5 = pset1<Packet>(-0.1690667718648f);
1108 const Packet p4 = pset1<Packet>( 0.1720575392246f);
1109 const Packet p3 = pset1<Packet>(-0.1789081543684f);
1110 const Packet p2 = pset1<Packet>( 0.2050433009862f);
1111 const Packet p1 = pset1<Packet>(-0.2404672354459f);
1112 const Packet p0 = pset1<Packet>( 0.2885761857032f);
1114 const Packet C3_hi = pset1<Packet>(-0.360674142838f);
1115 const Packet C3_lo = pset1<Packet>(-6.13283912543e-09f);
1116 const Packet C2_hi = pset1<Packet>(0.480897903442f);
1117 const Packet C2_lo = pset1<Packet>(-1.44861207474e-08f);
1118 const Packet C1_hi = pset1<Packet>(-0.721347510815f);
1119 const Packet C1_lo = pset1<Packet>(-4.84483164698e-09f);
1120 const Packet C0_hi = pset1<Packet>(1.44269502163f);
1121 const Packet C0_lo = pset1<Packet>(2.01711713999e-08f);
1122 const Packet one = pset1<Packet>(1.0f);
1130 p_even =
pmadd(p_even, x2, p2);
1131 p_even =
pmadd(p_even, x2, p0);
1133 p_odd =
pmadd(p_odd, x2, p1);
1144 fast_twosum(C3_hi, C3_lo, t_hi, t_lo, q_hi, q_lo);
1146 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1147 fast_twosum(C2_hi, C2_lo, t_hi, t_lo, q_hi, q_lo);
1149 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1150 fast_twosum(C1_hi, C1_lo, t_hi, t_lo, q_hi, q_lo);
1152 twoprod(q_hi, q_lo, x, t_hi, t_lo);
1153 fast_twosum(C0_hi, C0_lo, t_hi, t_lo, q_hi, q_lo);
1156 twoprod(q_hi, q_lo, x, log2_x_hi, log2_x_lo);
1169 template <
typename Packet>
1193 const Packet q12 = pset1<Packet>(2.87074255468000586e-9);
1194 const Packet q10 = pset1<Packet>(2.38957980901884082e-8);
1195 const Packet q8 = pset1<Packet>(2.31032094540014656e-7);
1196 const Packet q6 = pset1<Packet>(2.27279857398537278e-6);
1197 const Packet q4 = pset1<Packet>(2.31271023278625638e-5);
1198 const Packet q2 = pset1<Packet>(2.47556738444535513e-4);
1199 const Packet q0 = pset1<Packet>(2.88543873228900172e-3);
1200 const Packet C_hi = pset1<Packet>(0.0400377511598501157);
1201 const Packet C_lo = pset1<Packet>(-4.77726582251425391e-19);
1202 const Packet one = pset1<Packet>(1.0);
1204 const Packet cst_2_log2e_hi = pset1<Packet>(2.88539008177792677);
1205 const Packet cst_2_log2e_lo = pset1<Packet>(4.07660016854549667e-17);
1208 twoprod(cst_2_log2e_hi, cst_2_log2e_lo,
psub(x, one), num_hi, num_lo);
1212 Packet denom_hi, denom_lo;
1216 twoprod(num_hi, num_lo, denom_hi, denom_lo, r_hi, r_lo);
1219 twoprod(r_hi, r_lo, r_hi, r_lo, r2_hi, r2_lo);
1222 twoprod(r2_hi, r2_lo, r2_hi, r2_lo, r4_hi, r4_lo);
1228 q_even =
pmadd(q_even, r4_hi, q4);
1229 q_odd =
pmadd(q_odd, r4_hi, q2);
1230 q_even =
pmadd(q_even, r4_hi, q0);
1239 twoprod(r2_hi, r2_lo, q, p_hi, p_lo);
1242 fast_twosum(C_hi, C_lo, p_hi, p_lo, p1_hi, p1_lo);
1245 twoprod(r2_hi, r2_lo, p1_hi, p1_lo, p2_hi, p2_lo);
1251 twoprod(p3_hi, p3_lo, r_hi, r_lo, log2_x_hi, log2_x_lo);
1256 template <
typename Scalar>
1258 template <
typename Packet>
1272 template <
typename Packet>
1286 const Packet p4 = pset1<Packet>(1.539513905e-4f);
1287 const Packet p3 = pset1<Packet>(1.340007293e-3f);
1288 const Packet p2 = pset1<Packet>(9.618283249e-3f);
1289 const Packet p1 = pset1<Packet>(5.550328270e-2f);
1290 const Packet p0 = pset1<Packet>(0.2402264923f);
1292 const Packet C_hi = pset1<Packet>(0.6931471825f);
1293 const Packet C_lo = pset1<Packet>(2.36836577e-08f);
1294 const Packet one = pset1<Packet>(1.0f);
1302 p_even =
pmadd(p_even, x2, p0);
1312 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1315 twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
1321 return padd(q3_hi,
padd(q2_lo, q3_lo));
1330 template <
typename Packet>
1344 const Packet p9 = pset1<Packet>(4.431642109085495276e-10);
1345 const Packet p8 = pset1<Packet>(7.073829923303358410e-9);
1346 const Packet p7 = pset1<Packet>(1.017822306737031311e-7);
1347 const Packet p6 = pset1<Packet>(1.321543498017646657e-6);
1348 const Packet p5 = pset1<Packet>(1.525273342728892877e-5);
1349 const Packet p4 = pset1<Packet>(1.540353045780084423e-4);
1350 const Packet p3 = pset1<Packet>(1.333355814685869807e-3);
1351 const Packet p2 = pset1<Packet>(9.618129107593478832e-3);
1352 const Packet p1 = pset1<Packet>(5.550410866481961247e-2);
1353 const Packet p0 = pset1<Packet>(0.240226506959101332);
1354 const Packet C_hi = pset1<Packet>(0.693147180559945286);
1355 const Packet C_lo = pset1<Packet>(4.81927865669806721e-17);
1356 const Packet one = pset1<Packet>(1.0);
1364 p_even =
pmadd(p_even, x2, p4);
1365 p_odd =
pmadd(p_odd, x2, p5);
1366 p_even =
pmadd(p_even, x2, p2);
1367 p_odd =
pmadd(p_odd, x2, p3);
1368 p_even =
pmadd(p_even, x2, p0);
1369 p_odd =
pmadd(p_odd, x2, p1);
1379 twosum(p_hi, p_lo, C_hi, C_lo, q1_hi, q1_lo);
1382 twoprod(q1_hi, q1_lo, x, q2_hi, q2_lo);
1388 return padd(q3_hi,
padd(q2_lo, q3_lo));
1397 template <
typename Packet>
1406 const Packet m_x_scale_mask =
pcmp_lt(m_x, pset1<Packet>(sqrt_half));
1407 m_x =
pselect(m_x_scale_mask,
pmul(pset1<Packet>(Scalar(2)), m_x), m_x);
1408 e_x =
pselect(m_x_scale_mask,
psub(e_x, pset1<Packet>(Scalar(1))), e_x);
1416 Packet f1_hi, f1_lo, f2_hi, f2_lo;
1418 twoprod(rx_hi, rx_lo,
y, f2_hi, f2_lo);
1426 fast_twosum(f1_hi, f1_lo, f2_hi, f2_lo, f_hi, f_lo);
1431 r_z =
padd(r_z, f_lo);
1434 n_z =
padd(n_z, n_r);
1446 template<
typename Packet>
1453 const Packet cst_zero = pset1<Packet>(Scalar(0));
1454 const Packet cst_one = pset1<Packet>(Scalar(1));
1466 const Packet x_is_neg_one =
pand(abs_x_is_one, x_is_neg);
1483 const Packet y_div_2 =
pmul(
y, pset1<Packet>(Scalar(0.5)));
1490 const Packet pow_is_one =
por(
por(x_is_one, y_is_zero),
1492 por(abs_y_is_inf,
pandnot(y_is_even, invalid_negative_x))));
1493 const Packet pow_is_nan =
por(invalid_negative_x,
por(x_is_nan, y_is_nan));
1495 pand(abs_x_is_inf, y_is_neg)),
1496 pand(
pand(abs_x_is_lt_one, abs_y_is_huge),
1498 pand(
pand(abs_x_is_gt_one, abs_y_is_huge),
1501 pand(abs_x_is_inf, y_is_pos)),
1502 pand(
pand(abs_x_is_lt_one, abs_y_is_huge),
1504 pand(
pand(abs_x_is_gt_one, abs_y_is_huge),
1513 pselect(pow_is_inf, cst_pos_inf,
1514 pselect(pow_is_zero, cst_zero,
1559 template <
typename Packet,
int N>
1567 template <
typename Packet>
1571 return pset1<Packet>(coeff[0]);
1627 template <
typename Packet,
int N>
1632 Packet b0 = pset1<Packet>(coef[0]);
1633 Packet b1 = pset1<Packet>(
static_cast<Scalar
>(0.f));
1636 for (
int i = 1; i <
N; i++) {
1639 b0 =
psub(
pmadd(x, b1, pset1<Packet>(coef[i])), b2);
1642 return pmul(pset1<Packet>(
static_cast<Scalar
>(0.5f)),
psub(b0, b2));
EIGEN_DEVICE_FUNC const Expm1ReturnType expm1() const
Definition: ArrayCwiseUnaryOps.h:111
#define EIGEN_CONSTEXPR
Definition: Macros.h:787
#define EIGEN_UNUSED_VARIABLE(var)
Definition: Macros.h:1076
#define EIGEN_UNUSED
Definition: Macros.h:1067
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Definition: Macros.h:985
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
#define EIGEN_OPTIMIZATION_BARRIER(X)
Definition: Macros.h:1144
#define EIGEN_LOG2E
Definition: MathFunctions.h:17
#define EIGEN_LN2
Definition: MathFunctions.h:18
#define EIGEN_STATIC_ASSERT(CONDITION, MSG)
Definition: StaticAssert.h:127
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16() min(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:571
EIGEN_STRONG_INLINE Packet generic_pow_impl(const Packet &x, const Packet &y)
Definition: GenericPacketMathFunctions.h:1398
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:215
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: PacketMath.h:247
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet psincos_float(const Packet &_x)
Definition: GenericPacketMathFunctions.h:625
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog2_float(const Packet _x)
Definition: GenericPacketMathFunctions.h:262
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet generic_pow(const Packet &x, const Packet &y)
Definition: GenericPacketMathFunctions.h:1449
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
Definition: GenericPacketMath.h:808
const Scalar & y
Definition: MathFunctions.h:821
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Definition: GenericPacketMath.h:796
EIGEN_STRONG_INLINE bool predux_any(const Packet4f &x)
Definition: PacketMath.h:1765
EIGEN_STRONG_INLINE void veltkamp_splitting(const Packet &x, Packet &x_hi, Packet &x_lo)
Definition: GenericPacketMathFunctions.h:920
EIGEN_DEVICE_FUNC Packet pdiv(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:244
EIGEN_STRONG_INLINE void twosum(const Packet &x_hi, const Packet &x_lo, const Packet &y_hi, const Packet &y_lo, Packet &s_hi, Packet &s_lo)
Definition: GenericPacketMathFunctions.h:960
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet pexp_double(const Packet _x)
Definition: GenericPacketMathFunctions.h:490
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet &a)
Definition: GenericPacketMath.h:788
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i &a)
Definition: PacketMath.h:1191
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet psin_float(const Packet &x)
Definition: GenericPacketMathFunctions.h:747
EIGEN_STRONG_INLINE void fast_twosum(const Packet &x, const Packet &y, Packet &s_hi, Packet &s_lo)
Definition: GenericPacketMathFunctions.h:892
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:524
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:867
Packet generic_plog1p(const Packet &x)
Definition: GenericPacketMathFunctions.h:392
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet pcos_float(const Packet &x)
Definition: GenericPacketMathFunctions.h:755
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1042
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: PacketMath.h:827
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog_impl_float(const Packet _x)
Definition: GenericPacketMathFunctions.h:169
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:237
EIGEN_STRONG_INLINE Packet8h ptrue(const Packet8h &a)
Definition: PacketMath.h:978
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:512
EIGEN_STRONG_INLINE Packet1cd pcplxflip(const Packet1cd &x)
Definition: Complex.h:620
EIGEN_STRONG_INLINE Packet8h pandnot(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1053
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pround(const Packet &a)
Definition: GenericPacketMath.h:826
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: Complex.h:166
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog2_double(const Packet _x)
Definition: GenericPacketMathFunctions.h:383
EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:868
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet psqrt_complex(const Packet &a)
Definition: GenericPacketMathFunctions.h:764
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog_float(const Packet _x)
Definition: GenericPacketMathFunctions.h:254
EIGEN_STRONG_INLINE Packet4d pfrexp_generic_get_biased_exponent(const Packet4d &a)
Definition: PacketMath.h:743
Packet generic_expm1(const Packet &x)
Definition: GenericPacketMathFunctions.h:408
EIGEN_STRONG_INLINE void absolute_split(const Packet &x, Packet &n, Packet &r)
Definition: GenericPacketMathFunctions.h:883
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: PacketMath.h:1176
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog_double(const Packet _x)
Definition: GenericPacketMathFunctions.h:375
EIGEN_STRONG_INLINE Packet8f peven_mask(const Packet8f &)
Definition: PacketMath.h:252
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog_impl_double(const Packet _x)
Definition: GenericPacketMathFunctions.h:279
EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f &a)
Definition: PacketMath.h:723
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: Complex.h:231
EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
Definition: MathFunctions.h:196
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet &a, const Packet &exponent)
Definition: GenericPacketMathFunctions.h:85
EIGEN_DEVICE_FUNC void pstoreu(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:700
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1050
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1047
void doubleword_reciprocal(const Packet &x, Packet &recip_hi, Packet &recip_lo)
Definition: GenericPacketMathFunctions.h:1046
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:917
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic(const Packet &a, Packet &exponent)
Definition: GenericPacketMathFunctions.h:40
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:222
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
Definition: MathFunctions.h:188
svint32_t PacketXi __attribute__((arm_sve_vector_bits(EIGEN_ARM64_SVE_VL)))
Definition: PacketMath.h:33
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet pexp_float(const Packet _x)
Definition: GenericPacketMathFunctions.h:439
EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f &a)
Definition: PacketMath.h:730
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:870
float trig_reduce_huge(float xf, int *quadrant)
Definition: GenericPacketMathFunctions.h:565
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pfloor(const Packet &a)
Definition: GenericPacketMath.h:830
EIGEN_STRONG_INLINE void twoprod(const Packet &x, const Packet &y, Packet &p_hi, Packet &p_lo)
Definition: GenericPacketMathFunctions.h:936
::uint64_t uint64_t
Definition: Meta.h:58
::int16_t int16_t
Definition: Meta.h:55
::uint32_t uint32_t
Definition: Meta.h:56
EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE bool() isfinite(const Eigen::bfloat16 &h)
Definition: BFloat16.h:671
::int32_t int32_t
Definition: Meta.h:57
::int64_t int64_t
Definition: Meta.h:59
Namespace containing all symbols from the Eigen library.
Definition: LDLT.h:16
type
The type the bitset is encoded with.
Definition: bitset.hpp:44
Definition: document.h:416
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1181
signed __int64 int64_t
Definition: stdint.h:135
unsigned int uint32_t
Definition: stdint.h:126
unsigned __int64 uint64_t
Definition: stdint.h:136
Holds information about the various numeric (i.e. scalar) types allowed by Eigen.
Definition: NumTraits.h:233
Definition: BFloat16.h:58
Packet2d v
Definition: Complex.h:410
EIGEN_STRONG_INLINE void operator()(const Packet &x, Packet &log2_x_hi, Packet &log2_x_lo)
Definition: GenericPacketMathFunctions.h:1171
EIGEN_STRONG_INLINE void operator()(const Packet &z, Packet &log2_x_hi, Packet &log2_x_lo)
Definition: GenericPacketMathFunctions.h:1090
Definition: GenericPacketMathFunctions.h:1071
EIGEN_STRONG_INLINE void operator()(const Packet &x, Packet &log2_x_hi, Packet &log2_x_lo)
Definition: GenericPacketMathFunctions.h:1074
EIGEN_STRONG_INLINE Packet operator()(const Packet &x)
Definition: GenericPacketMathFunctions.h:1332
EIGEN_STRONG_INLINE Packet operator()(const Packet &x)
Definition: GenericPacketMathFunctions.h:1274
Definition: GenericPacketMathFunctions.h:1257
EIGEN_STRONG_INLINE Packet operator()(const Packet &x)
Definition: GenericPacketMathFunctions.h:1260
numext::int16_t type
Definition: GenericPacketMathFunctions.h:27
numext::int64_t type
Definition: GenericPacketMathFunctions.h:25
numext::int32_t type
Definition: GenericPacketMathFunctions.h:24
numext::int16_t type
Definition: GenericPacketMathFunctions.h:26
Definition: GenericPacketMathFunctions.h:23
Definition: GenericPacketMathFunctions.h:1628
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(Packet x, const typename unpacket_traits< Packet >::type coef[])
Definition: GenericPacketMathFunctions.h:1630
Definition: GenericPacketMathFunctions.h:139
unpacket_traits< Packet >::type Scalar
Definition: GenericPacketMathFunctions.h:141
@ MantissaBits
Definition: GenericPacketMathFunctions.h:145
@ TotalBits
Definition: GenericPacketMathFunctions.h:144
@ ExponentBits
Definition: GenericPacketMathFunctions.h:146
static EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet run(const Packet &a, const Packet &exponent)
Definition: GenericPacketMathFunctions.h:150
unpacket_traits< Packet >::integer_packet PacketI
Definition: GenericPacketMathFunctions.h:140
unpacket_traits< PacketI >::type ScalarI
Definition: GenericPacketMathFunctions.h:142
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet &x, const typename unpacket_traits< Packet >::type coeff[])
Definition: GenericPacketMathFunctions.h:1569
Definition: GenericPacketMathFunctions.h:1560
static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet &x, const typename unpacket_traits< Packet >::type coeff[])
Definition: GenericPacketMathFunctions.h:1561
Definition: GenericPacketMath.h:133
T type
Definition: GenericPacketMath.h:134
Definition: PacketMath.h:47