10 #ifndef THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
11 #define THIRD_PARTY_EIGEN3_EIGEN_SRC_CORE_ARCH_AVX512_MATHFUNCTIONS_H_
18 #if EIGEN_GNUC_AT_LEAST(5, 3) || EIGEN_COMP_CLANG || EIGEN_COMP_MSVC >= 1923
20 #define _EIGEN_DECLARE_CONST_Packet16f(NAME, X) \
21 const Packet16f p16f_##NAME = pset1<Packet16f>(X)
23 #define _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(NAME, X) \
24 const Packet16f p16f_##NAME = preinterpret<Packet16f,Packet16i>(pset1<Packet16i>(X))
26 #define _EIGEN_DECLARE_CONST_Packet8d(NAME, X) \
27 const Packet8d p8d_##NAME = pset1<Packet8d>(X)
29 #define _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(NAME, X) \
30 const Packet8d p8d_##NAME = _mm512_castsi512_pd(_mm512_set1_epi64(X))
32 #define _EIGEN_DECLARE_CONST_Packet16bf(NAME, X) \
33 const Packet16bf p16bf_##NAME = pset1<Packet16bf>(X)
35 #define _EIGEN_DECLARE_CONST_Packet16bf_FROM_INT(NAME, X) \
36 const Packet16bf p16bf_##NAME = preinterpret<Packet16bf,Packet16i>(pset1<Packet16i>(X))
61 plog2<Packet8d>(
const Packet8d& _x) {
74 _EIGEN_DECLARE_CONST_Packet16f(1, 1.0f);
75 _EIGEN_DECLARE_CONST_Packet16f(half, 0.5f);
76 _EIGEN_DECLARE_CONST_Packet16f(127, 127.0f);
78 _EIGEN_DECLARE_CONST_Packet16f(exp_hi, 88.3762626647950f);
79 _EIGEN_DECLARE_CONST_Packet16f(exp_lo, -88.3762626647949f);
81 _EIGEN_DECLARE_CONST_Packet16f(cephes_LOG2EF, 1.44269504088896341f);
83 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p0, 1.9875691500E-4f);
84 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p1, 1.3981999507E-3f);
85 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p2, 8.3334519073E-3f);
86 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p3, 4.1665795894E-2f);
87 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p4, 1.6666665459E-1f);
88 _EIGEN_DECLARE_CONST_Packet16f(cephes_exp_p5, 5.0000001201E-1f);
95 Packet16f m = _mm512_floor_ps(
pmadd(x, p16f_cephes_LOG2EF, p16f_half));
99 _EIGEN_DECLARE_CONST_Packet16f(nln2, -0.6931471805599453f);
100 Packet16f r = _mm512_fmadd_ps(m, p16f_nln2, x);
106 y =
pmadd(p16f_cephes_exp_p0, r, p16f_cephes_exp_p1);
107 y1 =
pmadd(p16f_cephes_exp_p3, r, p16f_cephes_exp_p4);
108 y2 =
padd(r, p16f_1);
109 y =
pmadd(
y, r, p16f_cephes_exp_p2);
110 y1 =
pmadd(y1, r, p16f_cephes_exp_p5);
116 emm0 = _mm512_slli_epi32(emm0, 23);
119 return pmax(
pmul(
y, _mm512_castsi512_ps(emm0)), _x);
124 pexp<Packet8d>(
const Packet8d& _x) {
168 __mmask16 denormal_mask = _mm512_kand(
171 _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_GE_OQ));
179 return _mm512_mask_blend_ps(denormal_mask,
pmul(_x,x), _mm512_setzero_ps());
184 psqrt<Packet8d>(
const Packet8d& _x) {
186 __mmask16 denormal_mask = _mm512_kand(
189 _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_GE_OQ));
199 return _mm512_mask_blend_pd(denormal_mask,
pmul(_x,x), _mm512_setzero_pd());
204 return _mm512_sqrt_ps(x);
209 return _mm512_sqrt_pd(x);
217 #if defined(EIGEN_VECTORIZE_AVX512ER)
221 return _mm512_rsqrt28_ps(x);
223 #elif EIGEN_FAST_MATH
228 _EIGEN_DECLARE_CONST_Packet16f_FROM_INT(inf, 0x7f800000);
229 _EIGEN_DECLARE_CONST_Packet16f(one_point_five, 1.5f);
230 _EIGEN_DECLARE_CONST_Packet16f(minus_half, -0.5f);
235 __mmask16 inf_mask = _mm512_cmp_ps_mask(_x, p16f_inf, _CMP_EQ_OQ);
236 __mmask16 not_pos_mask = _mm512_cmp_ps_mask(_x, _mm512_setzero_ps(), _CMP_LE_OQ);
237 __mmask16 not_finite_pos_mask = not_pos_mask | inf_mask;
241 Packet16f y_approx = _mm512_rsqrt14_ps(_x);
252 return _mm512_mask_blend_ps(not_finite_pos_mask, y_newton, y_approx);
258 _EIGEN_DECLARE_CONST_Packet16f(one, 1.0f);
259 return _mm512_div_ps(p16f_one, _mm512_sqrt_ps(x));
270 prsqrt<Packet8d>(
const Packet8d& _x) {
271 _EIGEN_DECLARE_CONST_Packet8d(one_point_five, 1.5);
272 _EIGEN_DECLARE_CONST_Packet8d(minus_half, -0.5);
273 _EIGEN_DECLARE_CONST_Packet8d_FROM_INT64(inf, 0x7ff0000000000000LL);
278 __mmask8 inf_mask = _mm512_cmp_pd_mask(_x, p8d_inf, _CMP_EQ_OQ);
279 __mmask8 not_pos_mask = _mm512_cmp_pd_mask(_x, _mm512_setzero_pd(), _CMP_LE_OQ);
280 __mmask8 not_finite_pos_mask = not_pos_mask | inf_mask;
284 #if defined(EIGEN_VECTORIZE_AVX512ER)
285 Packet8d y_approx = _mm512_rsqrt28_pd(_x);
287 Packet8d y_approx = _mm512_rsqrt14_pd(_x);
297 #if !defined(EIGEN_VECTORIZE_AVX512ER)
298 y_newton =
pmul(y_newton,
pmadd(y_newton,
pmul(neg_half, y_newton), p8d_one_point_five));
303 return _mm512_mask_blend_pd(not_finite_pos_mask, y_newton, y_approx);
308 _EIGEN_DECLARE_CONST_Packet8d(one, 1.0f);
309 return _mm512_div_pd(p8d_one, _mm512_sqrt_pd(x));
#define BF16_PACKET_FUNCTION(PACKET_F, PACKET_BF16, METHOD)
Definition: BFloat16.h:19
#define F16_PACKET_FUNCTION(PACKET_F, PACKET_F16, METHOD)
Definition: Half.h:53
#define EIGEN_UNUSED
Definition: Macros.h:1067
#define EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS
Definition: Macros.h:985
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC bfloat16() min(const bfloat16 &a, const bfloat16 &b)
Definition: BFloat16.h:571
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet &a)
Definition: GenericPacketMath.h:792
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f ptanh< Packet16f >(const Packet16f &_x)
Definition: MathFunctions.h:346
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:215
EIGEN_STRONG_INLINE Packet8h float2half(const Packet8f &a)
Definition: PacketMath.h:1007
EIGEN_STRONG_INLINE Packet8f Bf16ToF32(const Packet8bf &a)
Definition: PacketMath.h:1260
EIGEN_STRONG_INLINE Packet16f pfrexp< Packet16f >(const Packet16f &a, Packet16f &exponent)
Definition: PacketMath.h:898
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog2_float(const Packet _x)
Definition: GenericPacketMathFunctions.h:262
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f psin< Packet16f >(const Packet16f &_x)
Definition: MathFunctions.h:334
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet &a)
Definition: GenericPacketMath.h:808
__m512d Packet8d
Definition: PacketMath.h:33
const Scalar & y
Definition: MathFunctions.h:821
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet &a)
Definition: GenericPacketMath.h:796
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet pexp_double(const Packet _x)
Definition: GenericPacketMathFunctions.h:490
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet &a)
Definition: GenericPacketMath.h:788
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet &a)
Definition: GenericPacketMath.h:756
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet psin_float(const Packet &x)
Definition: GenericPacketMathFunctions.h:747
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:524
Packet generic_plog1p(const Packet &x)
Definition: GenericPacketMathFunctions.h:392
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet &a)
Definition: GenericPacketMath.h:752
eigen_packet_wrapper< __m256i, 2 > Packet16bf
Definition: PacketMath.h:35
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet pcos_float(const Packet &x)
Definition: GenericPacketMathFunctions.h:755
EIGEN_STRONG_INLINE Packet8f half2float(const Packet8h &a)
Definition: PacketMath.h:988
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: PacketMath.h:827
EIGEN_STRONG_INLINE Packet16f pset1< Packet16f >(const float &from)
Definition: PacketMath.h:197
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:237
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:512
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog2_double(const Packet _x)
Definition: GenericPacketMathFunctions.h:383
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet &a)
Definition: GenericPacketMath.h:784
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog_float(const Packet _x)
Definition: GenericPacketMathFunctions.h:254
Packet generic_expm1(const Packet &x)
Definition: GenericPacketMathFunctions.h:408
EIGEN_STRONG_INLINE Packet16f pldexp< Packet16f >(const Packet16f &a, const Packet16f &exponent)
Definition: PacketMath.h:919
__m512i Packet16i
Definition: PacketMath.h:32
EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet &a)
Definition: GenericPacketMath.h:800
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet plog_double(const Packet _x)
Definition: GenericPacketMathFunctions.h:375
EIGEN_STRONG_INLINE Packet4f psqrt(const Packet4f &a)
Definition: PacketMath.h:723
T generic_fast_tanh_float(const T &a_x)
Definition: MathFunctionsImpl.h:29
EIGEN_STRONG_INLINE Packet8h pldexp(const Packet8h &a, const Packet8h &exponent)
Definition: MathFunctions.h:196
eigen_packet_wrapper< __m256i, 1 > Packet16h
Definition: PacketMath.h:34
EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet16f pcos< Packet16f >(const Packet16f &_x)
Definition: MathFunctions.h:340
EIGEN_STRONG_INLINE Packet8d pset1< Packet8d >(const double &from)
Definition: PacketMath.h:201
EIGEN_STRONG_INLINE Packet8h pfrexp(const Packet8h &a, Packet8h &exponent)
Definition: MathFunctions.h:188
EIGEN_STRONG_INLINE Packet4f prsqrt(const Packet4f &a)
Definition: PacketMath.h:730
EIGEN_STRONG_INLINE Packet8bf F32ToBf16(Packet4f p4f)
Definition: PacketMath.h:1252
__m512 Packet16f
Definition: PacketMath.h:31
Namespace containing all symbols from the Eigen library.
Definition: LDLT.h:16
Definition: document.h:416
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1181