10 #ifndef EIGEN_PACKET_MATH_SSE_H
11 #define EIGEN_PACKET_MATH_SSE_H
17 #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
18 #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 8
21 #if !defined(EIGEN_VECTORIZE_AVX) && !defined(EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS)
24 #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS (2*sizeof(void*))
27 #ifdef EIGEN_VECTORIZE_FMA
28 #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
29 #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
33 #if ((defined EIGEN_VECTORIZE_AVX) && (EIGEN_COMP_GNUC_STRICT || EIGEN_COMP_MINGW) && (__GXX_ABI_VERSION < 1004)) || EIGEN_OS_QNX
39 typedef eigen_packet_wrapper<__m128>
Packet4f;
40 typedef eigen_packet_wrapper<__m128d>
Packet2d;
46 typedef eigen_packet_wrapper<__m128i, 0>
Packet4i;
55 template<
int p,
int q,
int r,
int s>
57 enum {
mask = (s)<<6|(r)<<4|(q)<<2|(p) };
61 #define vec4f_swizzle1(v,p,q,r,s) \
62 Packet4f(_mm_castsi128_ps(_mm_shuffle_epi32( _mm_castps_si128(v), (shuffle_mask<p,q,r,s>::mask))))
64 #define vec4i_swizzle1(v,p,q,r,s) \
65 Packet4i(_mm_shuffle_epi32( v, (shuffle_mask<p,q,r,s>::mask)))
67 #define vec2d_swizzle1(v,p,q) \
68 Packet2d(_mm_castsi128_pd(_mm_shuffle_epi32( _mm_castpd_si128(v), (shuffle_mask<2*p,2*p+1,2*q,2*q+1>::mask))))
70 #define vec4f_swizzle2(a,b,p,q,r,s) \
71 Packet4f(_mm_shuffle_ps( (a), (b), (shuffle_mask<p,q,r,s>::mask)))
73 #define vec4i_swizzle2(a,b,p,q,r,s) \
74 Packet4i(_mm_castps_si128( (_mm_shuffle_ps( _mm_castsi128_ps(a), _mm_castsi128_ps(b), (shuffle_mask<p,q,r,s>::mask)))))
92 #define vec4f_duplane(a,p) \
93 vec4f_swizzle2(a,a,p,p,p,p)
95 #define vec2d_swizzle2(a,b,mask) \
96 Packet2d(_mm_shuffle_pd(a,b,mask))
106 #define vec2d_duplane(a,p) \
107 vec2d_swizzle2(a,a,(p<<1)|p)
109 #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
110 const Packet4f p4f_##NAME = pset1<Packet4f>(X)
112 #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
113 const Packet2d p2d_##NAME = pset1<Packet2d>(X)
115 #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
116 const Packet4f p4f_##NAME = pset1frombits<Packet4f>(X)
118 #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
119 const Packet4i p4i_##NAME = pset1<Packet4i>(X)
124 #ifndef EIGEN_VECTORIZE_AVX
126 struct packet_traits<float> : default_packet_traits {
152 #ifdef EIGEN_VECTORIZE_SSE4_1
159 struct packet_traits<double> : default_packet_traits {
177 #ifdef EIGEN_VECTORIZE_SSE4_1
184 template<>
struct packet_traits<int> : default_packet_traits
222 template<>
struct unpacket_traits<
Packet4f> {
228 template<>
struct unpacket_traits<
Packet2d> {
233 template<>
struct unpacket_traits<
Packet4i> {
244 #ifndef EIGEN_VECTORIZE_AVX
245 template<>
struct scalar_div_cost<float,true> {
enum {
value = 7 }; };
246 template<>
struct scalar_div_cost<double,true> {
enum {
value = 8 }; };
249 #if EIGEN_COMP_MSVC==1500
279 #if EIGEN_COMP_GNUC_STRICT && (!defined __AVX__)
303 #ifdef EIGEN_VECTORIZE_SSE3
304 return _mm_addsub_ps(
a,b);
306 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x0,0x80000000,0x0));
314 #ifdef EIGEN_VECTORIZE_SSE3
315 return _mm_addsub_pd(
a,b);
317 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x0));
324 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x80000000,0x80000000,0x80000000,0x80000000));
325 return _mm_xor_ps(
a,mask);
329 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0x0,0x80000000,0x0,0x80000000));
330 return _mm_xor_pd(
a,mask);
350 #ifdef EIGEN_VECTORIZE_SSE4_1
351 return _mm_mullo_epi32(
a,b);
371 #ifdef EIGEN_VECTORIZE_FMA
376 #ifdef EIGEN_VECTORIZE_SSE4_1
378 return _mm_blendv_ps(b,
a,mask);
382 return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(b),_mm_castsi128_ps(
a),_mm_castsi128_ps(mask)));
388 return _mm_blendv_epi8(b,
a,mask);
393 Packet16b b_part = _mm_andnot_si128(mask, b);
394 return _mm_or_si128(a_part, b_part);
403 return _mm_castsi128_ps(_mm_cmpeq_epi32(b, b));
408 return _mm_castsi128_pd(_mm_cmpeq_epi32(b, b));
447 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
452 #ifdef EIGEN_VECTORIZE_AVX
454 asm(
"vminps %[a], %[b], %[res]" : [res]
"=x" (res) : [
a]
"x" (
a), [b]
"x" (b));
457 asm(
"minps %[a], %[res]" : [res]
"+x" (res) : [
a]
"x" (
a));
462 return _mm_min_ps(b,
a);
466 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
471 #ifdef EIGEN_VECTORIZE_AVX
473 asm(
"vminpd %[a], %[b], %[res]" : [res]
"=x" (res) : [
a]
"x" (
a), [b]
"x" (b));
476 asm(
"minpd %[a], %[res]" : [res]
"+x" (res) : [
a]
"x" (
a));
481 return _mm_min_pd(b,
a);
486 #ifdef EIGEN_VECTORIZE_SSE4_1
487 return _mm_min_epi32(
a,b);
491 return _mm_or_si128(_mm_and_si128(mask,
a),_mm_andnot_si128(mask,b));
497 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
502 #ifdef EIGEN_VECTORIZE_AVX
504 asm(
"vmaxps %[a], %[b], %[res]" : [res]
"=x" (res) : [
a]
"x" (
a), [b]
"x" (b));
507 asm(
"maxps %[a], %[res]" : [res]
"+x" (res) : [
a]
"x" (
a));
512 return _mm_max_ps(b,
a);
516 #if EIGEN_COMP_GNUC && EIGEN_COMP_GNUC < 63
521 #ifdef EIGEN_VECTORIZE_AVX
523 asm(
"vmaxpd %[a], %[b], %[res]" : [res]
"=x" (res) : [
a]
"x" (
a), [b]
"x" (b));
526 asm(
"maxpd %[a], %[res]" : [res]
"+x" (res) : [
a]
"x" (
a));
531 return _mm_max_pd(b,
a);
536 #ifdef EIGEN_VECTORIZE_SSE4_1
537 return _mm_max_epi32(
a,b);
541 return _mm_or_si128(_mm_and_si128(mask,
a),_mm_andnot_si128(mask,b));
545 template <
typename Packet,
typename Op>
551 return pselect<Packet>(not_nan_mask_a, m, b);
554 template <
typename Packet,
typename Op>
560 return pselect<Packet>(not_nan_mask_a, m,
a);
603 const Packet4f mask = _mm_castsi128_ps(_mm_setr_epi32(0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF));
604 return _mm_and_ps(
a,mask);
608 const Packet2d mask = _mm_castsi128_pd(_mm_setr_epi32(0xFFFFFFFF,0x7FFFFFFF,0xFFFFFFFF,0x7FFFFFFF));
609 return _mm_and_pd(
a,mask);
613 #ifdef EIGEN_VECTORIZE_SSSE3
614 return _mm_abs_epi32(
a);
617 return _mm_sub_epi32(_mm_xor_si128(
a,aux),aux);
621 #ifdef EIGEN_VECTORIZE_SSE4_1
627 return _mm_round_ps(
padd(
por(
pand(
a, mask), prev0dot5),
a), _MM_FROUND_TO_ZERO);
632 const Packet2d mask = _mm_castsi128_pd(_mm_set_epi64x(0x8000000000000000ull, 0x8000000000000000ull));
633 const Packet2d prev0dot5 = _mm_castsi128_pd(_mm_set_epi64x(0x3FDFFFFFFFFFFFFFull, 0x3FDFFFFFFFFFFFFFull));
634 return _mm_round_pd(
padd(
por(
pand(
a, mask), prev0dot5),
a), _MM_FROUND_TO_ZERO);
680 mask =
pand(mask, cst_1);
681 return psub(tmp, mask);
690 mask =
pand(mask, cst_1);
691 return psub(tmp, mask);
700 mask =
pand(mask, cst_1);
701 return padd(tmp, mask);
710 mask =
pand(mask, cst_1);
711 return padd(tmp, mask);
723 #if (EIGEN_COMP_MSVC==1600)
726 __m128 res = _mm_loadl_pi(_mm_set1_ps(0.0f), (
const __m64*)(from));
727 res = _mm_loadh_pi(res, (
const __m64*)(from+2));
730 return _mm_loadu_ps(from);
739 return _mm_loadu_ps(from);
746 return _mm_loadu_pd(from);
751 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from));
755 return _mm_loadu_si128(
reinterpret_cast<const __m128i*
>(from));
761 return vec4f_swizzle1(_mm_castpd_ps(_mm_load_sd(
reinterpret_cast<const double*
>(from))), 0, 0, 1, 1);
768 tmp = _mm_loadl_epi64(
reinterpret_cast<const __m128i*
>(from));
776 __m128i tmp = _mm_castpd_si128(pload1<Packet2d>(
reinterpret_cast<const double*
>(from)));
777 return _mm_unpacklo_epi8(tmp, tmp);
784 __m128i tmp = _mm_castps_si128(
pload1<Packet4f>(
reinterpret_cast<const float*
>(from)));
785 tmp = _mm_unpacklo_epi8(tmp, tmp);
786 return _mm_unpacklo_epi16(tmp, tmp);
801 return _mm_set_ps(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
805 return _mm_set_pd(from[1*stride], from[0*stride]);
809 return _mm_set_epi32(from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
814 return _mm_set_epi8(from[15*stride], from[14*stride], from[13*stride], from[12*stride],
815 from[11*stride], from[10*stride], from[9*stride], from[8*stride],
816 from[7*stride], from[6*stride], from[5*stride], from[4*stride],
817 from[3*stride], from[2*stride], from[1*stride], from[0*stride]);
822 to[stride*0] = _mm_cvtss_f32(from);
823 to[stride*1] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 1));
824 to[stride*2] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 2));
825 to[stride*3] = _mm_cvtss_f32(_mm_shuffle_ps(from, from, 3));
829 to[stride*0] = _mm_cvtsd_f64(from);
830 to[stride*1] = _mm_cvtsd_f64(_mm_shuffle_pd(from, from, 1));
834 to[stride*0] = _mm_cvtsi128_si32(from);
835 to[stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
836 to[stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
837 to[stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
841 to[4*stride*0] = _mm_cvtsi128_si32(from);
842 to[4*stride*1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 1));
843 to[4*stride*2] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 2));
844 to[4*stride*3] = _mm_cvtsi128_si32(_mm_shuffle_epi32(from, 3));
861 #if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900
867 #ifndef EIGEN_VECTORIZE_AVX
873 #if EIGEN_COMP_MSVC_STRICT && EIGEN_OS_WIN64
879 #elif EIGEN_COMP_MSVC_STRICT
895 #ifdef EIGEN_VECTORIZE_SSSE3
896 __m128i mask = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
897 return _mm_shuffle_epi8(
a, mask);
899 Packet16b tmp = _mm_shuffle_epi32(
a, _MM_SHUFFLE(0, 1, 2, 3));
900 tmp = _mm_shufflehi_epi16(_mm_shufflelo_epi16(tmp, _MM_SHUFFLE(2, 3, 0, 1)), _MM_SHUFFLE(2, 3, 0, 1));
901 return _mm_or_si128(_mm_slli_epi16(tmp, 8), _mm_srli_epi16(tmp, 8));
914 __m128i a_expo = _mm_srli_epi64(_mm_castpd_si128(
pand(
a, cst_exp_mask)), 52);
937 const Packet4i bias = _mm_set_epi32(0, 1023, 0, 1023);
938 Packet4i b = parithmetic_shift_right<2>(ei);
939 Packet2d c = _mm_castsi128_pd(_mm_slli_epi64(
padd(b, bias), 52));
942 c = _mm_castsi128_pd(_mm_slli_epi64(
padd(b, bias), 52));
963 #ifdef EIGEN_VECTORIZE_SSE3
964 a0 = _mm_loaddup_pd(
a+0);
965 a1 = _mm_loaddup_pd(
a+1);
966 a2 = _mm_loaddup_pd(
a+2);
967 a3 = _mm_loaddup_pd(
a+3);
981 vecs[1] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x55));
982 vecs[2] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xAA));
983 vecs[3] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0xFF));
984 vecs[0] = _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(vecs[0]), 0x00));
1011 #ifdef EIGEN_VECTORIZE_SSSE3
1021 Packet4i tmp = _mm_add_epi32(
a, _mm_unpackhi_epi64(
a,
a));
1027 Packet4i tmp = _mm_or_si128(
a, _mm_unpackhi_epi64(
a,
a));
1051 return (aux[0] * aux[1]) * (aux[2] * aux[3]);
1055 Packet4i tmp = _mm_and_si128(
a, _mm_unpackhi_epi64(
a,
a));
1072 #ifdef EIGEN_VECTORIZE_SSE4_1
1073 Packet4i tmp = _mm_min_epi32(
a, _mm_shuffle_epi32(
a, _MM_SHUFFLE(0,0,3,2)));
1080 int aux0 = aux[0]<aux[1] ? aux[0] : aux[1];
1081 int aux2 = aux[2]<aux[3] ? aux[2] : aux[3];
1082 return aux0<aux2 ? aux0 : aux2;
1098 #ifdef EIGEN_VECTORIZE_SSE4_1
1099 Packet4i tmp = _mm_max_epi32(
a, _mm_shuffle_epi32(
a, _MM_SHUFFLE(0,0,3,2)));
1106 int aux0 = aux[0]>aux[1] ? aux[0] : aux[1];
1107 int aux2 = aux[2]>aux[3] ? aux[2] : aux[3];
1108 return aux0>aux2 ? aux0 : aux2;
1120 return _mm_movemask_ps(x) != 0x0;
1124 ptranspose(PacketBlock<Packet4f,4>& kernel) {
1125 _MM_TRANSPOSE4_PS(kernel.packet[0], kernel.packet[1], kernel.packet[2], kernel.packet[3]);
1129 ptranspose(PacketBlock<Packet2d,2>& kernel) {
1130 __m128d tmp = _mm_unpackhi_pd(kernel.packet[0], kernel.packet[1]);
1131 kernel.packet[0] = _mm_unpacklo_pd(kernel.packet[0], kernel.packet[1]);
1132 kernel.packet[1] = tmp;
1136 ptranspose(PacketBlock<Packet4i,4>& kernel) {
1137 __m128i T0 = _mm_unpacklo_epi32(kernel.packet[0], kernel.packet[1]);
1138 __m128i T1 = _mm_unpacklo_epi32(kernel.packet[2], kernel.packet[3]);
1139 __m128i T2 = _mm_unpackhi_epi32(kernel.packet[0], kernel.packet[1]);
1140 __m128i T3 = _mm_unpackhi_epi32(kernel.packet[2], kernel.packet[3]);
1142 kernel.packet[0] = _mm_unpacklo_epi64(T0, T1);
1143 kernel.packet[1] = _mm_unpackhi_epi64(T0, T1);
1144 kernel.packet[2] = _mm_unpacklo_epi64(T2, T3);
1145 kernel.packet[3] = _mm_unpackhi_epi64(T2, T3);
1150 __m128i T0 = _mm_unpacklo_epi8(kernel.
packet[0], kernel.
packet[1]);
1151 __m128i T1 = _mm_unpackhi_epi8(kernel.
packet[0], kernel.
packet[1]);
1152 __m128i T2 = _mm_unpacklo_epi8(kernel.
packet[2], kernel.
packet[3]);
1153 __m128i T3 = _mm_unpackhi_epi8(kernel.
packet[2], kernel.
packet[3]);
1154 kernel.
packet[0] = _mm_unpacklo_epi16(T0, T2);
1155 kernel.
packet[1] = _mm_unpackhi_epi16(T0, T2);
1156 kernel.
packet[2] = _mm_unpacklo_epi16(T1, T3);
1157 kernel.
packet[3] = _mm_unpackhi_epi16(T1, T3);
1173 __m128i t0 = _mm_unpacklo_epi8(kernel.
packet[0], kernel.
packet[1]);
1174 __m128i t1 = _mm_unpackhi_epi8(kernel.
packet[0], kernel.
packet[1]);
1175 __m128i t2 = _mm_unpacklo_epi8(kernel.
packet[2], kernel.
packet[3]);
1176 __m128i t3 = _mm_unpackhi_epi8(kernel.
packet[2], kernel.
packet[3]);
1177 __m128i t4 = _mm_unpacklo_epi8(kernel.
packet[4], kernel.
packet[5]);
1178 __m128i t5 = _mm_unpackhi_epi8(kernel.
packet[4], kernel.
packet[5]);
1179 __m128i t6 = _mm_unpacklo_epi8(kernel.
packet[6], kernel.
packet[7]);
1180 __m128i t7 = _mm_unpackhi_epi8(kernel.
packet[6], kernel.
packet[7]);
1181 __m128i t8 = _mm_unpacklo_epi8(kernel.
packet[8], kernel.
packet[9]);
1182 __m128i t9 = _mm_unpackhi_epi8(kernel.
packet[8], kernel.
packet[9]);
1183 __m128i ta = _mm_unpacklo_epi8(kernel.
packet[10], kernel.
packet[11]);
1184 __m128i tb = _mm_unpackhi_epi8(kernel.
packet[10], kernel.
packet[11]);
1185 __m128i tc = _mm_unpacklo_epi8(kernel.
packet[12], kernel.
packet[13]);
1186 __m128i td = _mm_unpackhi_epi8(kernel.
packet[12], kernel.
packet[13]);
1187 __m128i te = _mm_unpacklo_epi8(kernel.
packet[14], kernel.
packet[15]);
1188 __m128i tf = _mm_unpackhi_epi8(kernel.
packet[14], kernel.
packet[15]);
1190 __m128i s0 = _mm_unpacklo_epi16(t0, t2);
1191 __m128i s1 = _mm_unpackhi_epi16(t0, t2);
1192 __m128i s2 = _mm_unpacklo_epi16(t1, t3);
1193 __m128i s3 = _mm_unpackhi_epi16(t1, t3);
1194 __m128i s4 = _mm_unpacklo_epi16(t4, t6);
1195 __m128i s5 = _mm_unpackhi_epi16(t4, t6);
1196 __m128i s6 = _mm_unpacklo_epi16(t5, t7);
1197 __m128i s7 = _mm_unpackhi_epi16(t5, t7);
1198 __m128i s8 = _mm_unpacklo_epi16(t8, ta);
1199 __m128i s9 = _mm_unpackhi_epi16(t8, ta);
1200 __m128i sa = _mm_unpacklo_epi16(t9, tb);
1201 __m128i sb = _mm_unpackhi_epi16(t9, tb);
1202 __m128i sc = _mm_unpacklo_epi16(tc, te);
1203 __m128i sd = _mm_unpackhi_epi16(tc, te);
1204 __m128i se = _mm_unpacklo_epi16(td, tf);
1205 __m128i sf = _mm_unpackhi_epi16(td, tf);
1207 __m128i u0 = _mm_unpacklo_epi32(s0, s4);
1208 __m128i u1 = _mm_unpackhi_epi32(s0, s4);
1209 __m128i u2 = _mm_unpacklo_epi32(s1, s5);
1210 __m128i u3 = _mm_unpackhi_epi32(s1, s5);
1211 __m128i u4 = _mm_unpacklo_epi32(s2, s6);
1212 __m128i u5 = _mm_unpackhi_epi32(s2, s6);
1213 __m128i u6 = _mm_unpacklo_epi32(s3, s7);
1214 __m128i u7 = _mm_unpackhi_epi32(s3, s7);
1215 __m128i u8 = _mm_unpacklo_epi32(s8, sc);
1216 __m128i u9 = _mm_unpackhi_epi32(s8, sc);
1217 __m128i ua = _mm_unpacklo_epi32(s9, sd);
1218 __m128i ub = _mm_unpackhi_epi32(s9, sd);
1219 __m128i uc = _mm_unpacklo_epi32(sa, se);
1220 __m128i ud = _mm_unpackhi_epi32(sa, se);
1221 __m128i ue = _mm_unpacklo_epi32(sb, sf);
1222 __m128i uf = _mm_unpackhi_epi32(sb, sf);
1224 kernel.
packet[0] = _mm_unpacklo_epi64(u0, u8);
1225 kernel.
packet[1] = _mm_unpackhi_epi64(u0, u8);
1226 kernel.
packet[2] = _mm_unpacklo_epi64(u1, u9);
1227 kernel.
packet[3] = _mm_unpackhi_epi64(u1, u9);
1228 kernel.
packet[4] = _mm_unpacklo_epi64(u2, ua);
1229 kernel.
packet[5] = _mm_unpackhi_epi64(u2, ua);
1230 kernel.
packet[6] = _mm_unpacklo_epi64(u3, ub);
1231 kernel.
packet[7] = _mm_unpackhi_epi64(u3, ub);
1232 kernel.
packet[8] = _mm_unpacklo_epi64(u4, uc);
1233 kernel.
packet[9] = _mm_unpackhi_epi64(u4, uc);
1234 kernel.
packet[10] = _mm_unpacklo_epi64(u5, ud);
1235 kernel.
packet[11] = _mm_unpackhi_epi64(u5, ud);
1236 kernel.
packet[12] = _mm_unpacklo_epi64(u6, ue);
1237 kernel.
packet[13] = _mm_unpackhi_epi64(u6, ue);
1238 kernel.
packet[14] = _mm_unpacklo_epi64(u7, uf);
1239 kernel.
packet[15] = _mm_unpackhi_epi64(u7, uf);
1243 const __m128i zero = _mm_setzero_si128();
1244 const __m128i select = _mm_set_epi32(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
1245 __m128i false_mask = _mm_cmpeq_epi32(select, zero);
1246 #ifdef EIGEN_VECTORIZE_SSE4_1
1247 return _mm_blendv_epi8(thenPacket, elsePacket, false_mask);
1249 return _mm_or_si128(_mm_andnot_si128(false_mask, thenPacket), _mm_and_si128(false_mask, elsePacket));
1253 const __m128 zero = _mm_setzero_ps();
1254 const __m128 select = _mm_set_ps(ifPacket.select[3], ifPacket.select[2], ifPacket.select[1], ifPacket.select[0]);
1255 __m128 false_mask = _mm_cmpeq_ps(select, zero);
1256 #ifdef EIGEN_VECTORIZE_SSE4_1
1257 return _mm_blendv_ps(thenPacket, elsePacket, false_mask);
1259 return _mm_or_ps(_mm_andnot_ps(false_mask, thenPacket), _mm_and_ps(false_mask, elsePacket));
1263 const __m128d zero = _mm_setzero_pd();
1264 const __m128d select = _mm_set_pd(ifPacket.select[1], ifPacket.select[0]);
1265 __m128d false_mask = _mm_cmpeq_pd(select, zero);
1266 #ifdef EIGEN_VECTORIZE_SSE4_1
1267 return _mm_blendv_pd(thenPacket, elsePacket, false_mask);
1269 return _mm_or_pd(_mm_andnot_pd(false_mask, thenPacket), _mm_and_pd(false_mask, elsePacket));
1274 #ifdef EIGEN_VECTORIZE_FMA
1276 return ::fmaf(
a,b,c);
1279 return ::fma(
a,b,c);
1294 template<>
struct is_arithmetic<Packet4h> {
enum {
value =
true }; };
1297 struct packet_traits<
Eigen::half> : default_packet_traits {
1298 typedef Packet4h
type;
1300 typedef Packet4h
half;
1330 result.x = _mm_set1_pi16(from.
x);
1341 __int64_t a64 = _mm_cvtm64_si64(
a.x);
1342 __int64_t b64 = _mm_cvtm64_si64(b.x);
1359 result.
x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
1364 __int64_t a64 = _mm_cvtm64_si64(
a.x);
1365 __int64_t b64 = _mm_cvtm64_si64(b.x);
1382 result.
x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
1387 __int64_t a64 = _mm_cvtm64_si64(
a.x);
1388 __int64_t b64 = _mm_cvtm64_si64(b.x);
1405 result.
x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
1410 __int64_t a64 = _mm_cvtm64_si64(
a.x);
1411 __int64_t b64 = _mm_cvtm64_si64(b.x);
1428 result.
x = _mm_set_pi16(h[3].x, h[2].x, h[1].x, h[0].x);
1434 result.x = _mm_cvtsi64_m64(*
reinterpret_cast<const __int64_t*
>(from));
1440 result.x = _mm_cvtsi64_m64(*
reinterpret_cast<const __int64_t*
>(from));
1445 __int64_t r = _mm_cvtm64_si64(from.x);
1446 *(
reinterpret_cast<__int64_t*
>(to)) = r;
1450 __int64_t r = _mm_cvtm64_si64(from.x);
1451 *(
reinterpret_cast<__int64_t*
>(to)) = r;
1456 return pset1<Packet4h>(*from);
1462 result.x = _mm_set_pi16(from[3*stride].x, from[2*stride].x, from[1*stride].x, from[0*stride].x);
1468 __int64_t
a = _mm_cvtm64_si64(from.x);
1469 to[stride*0].
x =
static_cast<unsigned short>(
a);
1470 to[stride*1].
x =
static_cast<unsigned short>(
a >> 16);
1471 to[stride*2].
x =
static_cast<unsigned short>(
a >> 32);
1472 to[stride*3].
x =
static_cast<unsigned short>(
a >> 48);
1476 ptranspose(PacketBlock<Packet4h,4>& kernel) {
1477 __m64 T0 = _mm_unpacklo_pi16(kernel.packet[0].x, kernel.packet[1].x);
1478 __m64 T1 = _mm_unpacklo_pi16(kernel.packet[2].x, kernel.packet[3].x);
1479 __m64 T2 = _mm_unpackhi_pi16(kernel.packet[0].x, kernel.packet[1].x);
1480 __m64 T3 = _mm_unpackhi_pi16(kernel.packet[2].x, kernel.packet[3].x);
1482 kernel.packet[0].x = _mm_unpacklo_pi32(T0, T1);
1483 kernel.packet[1].x = _mm_unpackhi_pi32(T0, T1);
1484 kernel.packet[2].x = _mm_unpacklo_pi32(T2, T3);
1485 kernel.packet[3].x = _mm_unpackhi_pi32(T2, T3);
1495 #if EIGEN_COMP_PGI && EIGEN_COMP_PGI < 1900
1497 static inline __m128 _mm_castpd_ps (__m128d x) {
return reinterpret_cast<__m128&
>(x); }
1498 static inline __m128i _mm_castpd_si128(__m128d x) {
return reinterpret_cast<__m128i&
>(x); }
1499 static inline __m128d _mm_castps_pd (__m128 x) {
return reinterpret_cast<__m128d&
>(x); }
1500 static inline __m128i _mm_castps_si128(__m128 x) {
return reinterpret_cast<__m128i&
>(x); }
1501 static inline __m128 _mm_castsi128_ps(__m128i x) {
return reinterpret_cast<__m128&
>(x); }
1502 static inline __m128d _mm_castsi128_pd(__m128i x) {
return reinterpret_cast<__m128d&
>(x); }
#define EIGEN_DEBUG_ALIGNED_STORE
Definition: GenericPacketMath.h:35
#define EIGEN_DEBUG_ALIGNED_LOAD
Definition: GenericPacketMath.h:27
#define EIGEN_DEBUG_UNALIGNED_STORE
Definition: GenericPacketMath.h:39
#define EIGEN_DEBUG_UNALIGNED_LOAD
Definition: GenericPacketMath.h:31
#define EIGEN_DEVICE_FUNC
Definition: Macros.h:976
#define EIGEN_FAST_MATH
Definition: Macros.h:49
#define EIGEN_STRONG_INLINE
Definition: Macros.h:917
#define EIGEN_OPTIMIZATION_BARRIER(X)
Definition: Macros.h:1144
#define vec2d_swizzle1(v, p, q)
Definition: PacketMath.h:67
#define vec4i_swizzle1(v, p, q, r, s)
Definition: PacketMath.h:64
#define vec4i_swizzle2(a, b, p, q, r, s)
Definition: PacketMath.h:73
@ Aligned16
Definition: Constants.h:235
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC EIGEN_CONSTEXPR __half_raw raw_uint16_to_half(numext::uint16_t x)
Definition: Half.h:495
EIGEN_STRONG_INLINE Packet4f pandnot< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:914
v2f64 Packet2d
Definition: PacketMath.h:820
EIGEN_STRONG_INLINE void pstoreu< double >(double *to, const Packet4d &from)
Definition: PacketMath.h:627
EIGEN_STRONG_INLINE Packet pminmax_propagate_numbers(const Packet &a, const Packet &b, Op op)
Definition: PacketMath.h:546
EIGEN_DEVICE_FUNC void pscatter< bool, Packet16b >(bool *to, const Packet16b &from, Index stride)
Definition: PacketMath.h:839
EIGEN_STRONG_INLINE double predux< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1082
EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf &a)
Definition: Complex.h:167
EIGEN_STRONG_INLINE void pstore< bool >(bool *to, const Packet16b &from)
Definition: PacketMath.h:792
EIGEN_STRONG_INLINE bool predux< Packet16b >(const Packet16b &a)
Definition: PacketMath.h:1026
EIGEN_DEVICE_FUNC Packet padd(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:215
EIGEN_STRONG_INLINE Packet4f pmin< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:832
EIGEN_STRONG_INLINE Packet16b ploaddup< Packet16b >(const bool *from)
Definition: PacketMath.h:774
EIGEN_STRONG_INLINE Packet2d padd< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:880
EIGEN_STRONG_INLINE Packet2d pandnot< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:960
EIGEN_STRONG_INLINE Packet16b pmul< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:364
EIGEN_STRONG_INLINE Packet8f pzero(const Packet8f &)
Definition: PacketMath.h:247
__vector int Packet4i
Definition: PacketMath.h:31
EIGEN_STRONG_INLINE Packet4f vec4f_movelh(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:121
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:585
EIGEN_STRONG_INLINE Packet4f padd< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:774
EIGEN_STRONG_INLINE Packet4i por< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:901
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:577
EIGEN_STRONG_INLINE Packet4i pset1< Packet4i >(const int &from)
Definition: PacketMath.h:551
EIGEN_STRONG_INLINE Packet2d paddsub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:312
EIGEN_STRONG_INLINE Packet16b ploadquad< Packet16b >(const bool *from)
Definition: PacketMath.h:783
EIGEN_STRONG_INLINE Packet16b psub< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:298
EIGEN_STRONG_INLINE Packet2d vec2d_unpackhi(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:102
EIGEN_STRONG_INLINE float pfirst< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:1120
EIGEN_STRONG_INLINE Packet2d pand< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:939
EIGEN_STRONG_INLINE Packet16b por< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:420
EIGEN_STRONG_INLINE void ptranspose(PacketBlock< Packet2cf, 2 > &kernel)
Definition: Complex.h:224
EIGEN_STRONG_INLINE Packet4i ploaddup< Packet4i >(const int *from)
Definition: PacketMath.h:1008
EIGEN_STRONG_INLINE bool predux_any(const Packet4f &x)
Definition: PacketMath.h:1765
EIGEN_STRONG_INLINE float predux_max< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:1693
EIGEN_STRONG_INLINE Packet2d ploaddup< Packet2d >(const double *from)
Definition: PacketMath.h:1011
EIGEN_STRONG_INLINE Packet4f vec4f_movehl(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:125
EIGEN_STRONG_INLINE Packet2d pxor< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:953
EIGEN_STRONG_INLINE Packet16b pload< Packet16b >(const bool *from)
Definition: PacketMath.h:718
EIGEN_STRONG_INLINE Packet2d por< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:946
EIGEN_STRONG_INLINE Packet2d pldexp< Packet2d >(const Packet2d &a, const Packet2d &exponent)
Definition: PacketMath.h:928
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:573
eigen_packet_wrapper< __m128i, 1 > Packet16b
Definition: PacketMath.h:47
EIGEN_STRONG_INLINE void pstore1< Packet2d >(double *to, const double &a)
Definition: PacketMath.h:855
EIGEN_DEVICE_FUNC void pscatter< int, Packet4i >(int *to, const Packet4i &from, Index stride)
Definition: PacketMath.h:700
EIGEN_STRONG_INLINE Packet4f ploaddup< Packet4f >(const float *from)
Definition: PacketMath.h:1004
EIGEN_STRONG_INLINE Packet4f por< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:900
EIGEN_STRONG_INLINE Packet4i plogical_shift_left(const Packet4i &a)
Definition: PacketMath.h:1191
EIGEN_STRONG_INLINE Packet16b pset1< Packet16b >(const bool &from)
Definition: PacketMath.h:261
EIGEN_STRONG_INLINE int predux_min< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:1618
EIGEN_STRONG_INLINE Packet4i pxor< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:909
EIGEN_STRONG_INLINE Packet4f print(const Packet4f &a)
Definition: PacketMath.h:3115
EIGEN_STRONG_INLINE double predux_max< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1116
EIGEN_STRONG_INLINE Packet4f pmul< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:795
EIGEN_STRONG_INLINE void pstore1< Packet4f >(float *to, const float &a)
Definition: PacketMath.h:849
EIGEN_STRONG_INLINE Packet4f pload1< Packet4f >(const float *from)
Definition: PacketMath.h:144
EIGEN_DEVICE_FUNC Packet pmax(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:524
EIGEN_STRONG_INLINE Packet4i pblend(const Selector< 4 > &ifPacket, const Packet4i &thenPacket, const Packet4i &elsePacket)
Definition: PacketMath.h:2107
EIGEN_DEVICE_FUNC Packet4f pgather< float, Packet4f >(const float *from, Index stride)
Definition: PacketMath.h:613
EIGEN_STRONG_INLINE Packet4f pcmp_le(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:867
EIGEN_STRONG_INLINE Packet4f paddsub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:869
EIGEN_STRONG_INLINE Packet2d pset1< Packet2d >(const double &from)
Definition: PacketMath.h:872
EIGEN_STRONG_INLINE Packet4i plogical_shift_right(const Packet4i &a)
Definition: PacketMath.h:1189
EIGEN_STRONG_INLINE Packet pminmax_propagate_nan(const Packet &a, const Packet &b, Op op)
Definition: PacketMath.h:555
EIGEN_STRONG_INLINE void punpackp(Packet4f *vecs)
Definition: PacketMath.h:979
EIGEN_STRONG_INLINE Packet4f pload< Packet4f >(const float *from)
Definition: PacketMath.h:443
EIGEN_STRONG_INLINE int predux_mul< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:1540
EIGEN_STRONG_INLINE void pstore< int >(int *to, const Packet4i &from)
Definition: PacketMath.h:496
EIGEN_STRONG_INLINE Packet8h por(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1042
EIGEN_STRONG_INLINE Packet2d ptrue< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:406
EIGEN_STRONG_INLINE Packet2d pmax< PropagateNaN, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:593
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:1189
EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf &a)
Definition: Complex.h:184
EIGEN_STRONG_INLINE void pstore< double >(double *to, const Packet4d &from)
Definition: PacketMath.h:623
EIGEN_STRONG_INLINE Packet4i padd< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:775
EIGEN_STRONG_INLINE Packet4f pfloor< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:939
EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f &a, const Packet4f &b, const Packet4f &c)
Definition: PacketMath.h:827
EIGEN_STRONG_INLINE Packet4i pandnot< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:915
EIGEN_STRONG_INLINE Packet16b pand< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:415
EIGEN_DEVICE_FUNC Packet pmul(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:237
EIGEN_DEVICE_FUNC Packet pmin(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:512
EIGEN_STRONG_INLINE Packet4f pdiv< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:803
EIGEN_STRONG_INLINE Packet2d pload< Packet2d >(const double *from)
Definition: PacketMath.h:967
EIGEN_STRONG_INLINE Packet16b ploadu< Packet16b >(const bool *from)
Definition: PacketMath.h:753
EIGEN_STRONG_INLINE Packet2d pmul< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:916
EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf &a)
Definition: Complex.h:166
EIGEN_STRONG_INLINE Packet4f pfrexp< Packet4f >(const Packet4f &a, Packet4f &exponent)
Definition: PacketMath.h:1361
EIGEN_STRONG_INLINE float predux_mul< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:1533
EIGEN_STRONG_INLINE Packet4f pcmp_lt(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:868
EIGEN_STRONG_INLINE Packet2d pmin< PropagateNumbers, Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:569
EIGEN_STRONG_INLINE Packet16b ptrue< Packet16b >(const Packet16b &a)
Definition: PacketMath.h:399
EIGEN_STRONG_INLINE void prefetch< float >(const float *addr)
Definition: PacketMath.h:1117
EIGEN_STRONG_INLINE Packet4i parithmetic_shift_right(const Packet4i &a)
Definition: PacketMath.h:1187
EIGEN_STRONG_INLINE Packet4d pfrexp_generic_get_biased_exponent(const Packet4d &a)
Definition: PacketMath.h:743
EIGEN_DEVICE_FUNC void pscatter< double, Packet2d >(double *to, const Packet2d &from, Index stride)
Definition: PacketMath.h:1044
EIGEN_STRONG_INLINE Packet4f pmax< PropagateNaN, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:1237
EIGEN_STRONG_INLINE Packet4i ploadu< Packet4i >(const int *from)
Definition: PacketMath.h:972
EIGEN_STRONG_INLINE double predux_mul< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1092
EIGEN_STRONG_INLINE Packet2d pdiv< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:923
EIGEN_STRONG_INLINE double predux_min< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1101
EIGEN_STRONG_INLINE Packet4f pset1< Packet4f >(const float &from)
Definition: PacketMath.h:547
EIGEN_STRONG_INLINE Packet4i psub< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:783
EIGEN_STRONG_INLINE Packet4f pmin< PropagateNumbers, Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:565
EIGEN_DEVICE_FUNC void pscatter< float, Packet4f >(float *to, const Packet4f &from, Index stride)
Definition: PacketMath.h:695
EIGEN_STRONG_INLINE Packet2d plset< Packet2d >(const double &a)
Definition: PacketMath.h:887
EIGEN_STRONG_INLINE bool pfirst< Packet16b >(const Packet16b &a)
Definition: PacketMath.h:889
EIGEN_STRONG_INLINE void pstoreu< bool >(bool *to, const Packet16b &from)
Definition: PacketMath.h:797
EIGEN_STRONG_INLINE Packet4f pceil< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:938
const char * SsePrefetchPtrType
Definition: PacketMath.h:864
EIGEN_STRONG_INLINE void pstore< float >(float *to, const Packet4f &from)
Definition: PacketMath.h:491
EIGEN_STRONG_INLINE Packet4f pabs(const Packet4f &a)
Definition: PacketMath.h:1176
EIGEN_STRONG_INLINE Packet8f peven_mask(const Packet8f &)
Definition: PacketMath.h:252
EIGEN_STRONG_INLINE bfloat16 pfirst(const Packet8bf &a)
Definition: PacketMath.h:1429
EIGEN_STRONG_INLINE void pbroadcast4< Packet2d >(const double *a, Packet2d &a0, Packet2d &a1, Packet2d &a2, Packet2d &a3)
Definition: PacketMath.h:960
EIGEN_STRONG_INLINE Packet4f pset1frombits< Packet4f >(unsigned int from)
Definition: PacketMath.h:571
EIGEN_DEVICE_FUNC void pstore(Scalar *to, const Packet &from)
Definition: GenericPacketMath.h:696
EIGEN_STRONG_INLINE Packet4f pldexp< Packet4f >(const Packet4f &a, const Packet4f &exponent)
Definition: PacketMath.h:1354
EIGEN_STRONG_INLINE Packet2d ploadu< Packet2d >(const double *from)
Definition: PacketMath.h:1004
EIGEN_STRONG_INLINE Packet4f vec4f_unpackhi(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:133
EIGEN_STRONG_INLINE Packet4f pxor< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:908
EIGEN_STRONG_INLINE Packet4i pmin< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:843
EIGEN_STRONG_INLINE Packet2d pfrexp< Packet2d >(const Packet2d &a, Packet2d &exponent)
Definition: PacketMath.h:918
EIGEN_STRONG_INLINE Packet2cf pcmp_eq(const Packet2cf &a, const Packet2cf &b)
Definition: Complex.h:231
EIGEN_DEVICE_FUNC Packet16b pgather< bool, Packet16b >(const bool *from, Index stride)
Definition: PacketMath.h:812
EIGEN_STRONG_INLINE Packet4f vec4f_swizzle1(const Packet4f &a, int p, int q, int r, int s)
Definition: PacketMath.h:113
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pldexp_generic(const Packet &a, const Packet &exponent)
Definition: GenericPacketMathFunctions.h:85
EIGEN_STRONG_INLINE Packet4f ptrue< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:401
EIGEN_STRONG_INLINE Packet4f vec4f_unpacklo(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:129
EIGEN_STRONG_INLINE void pstoreu< int >(int *to, const Packet4i &from)
Definition: PacketMath.h:1092
EIGEN_STRONG_INLINE Packet8h pand(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1050
EIGEN_STRONG_INLINE Packet4i ptrue< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:398
EIGEN_STRONG_INLINE Packet8h pxor(const Packet8h &a, const Packet8h &b)
Definition: PacketMath.h:1047
EIGEN_STRONG_INLINE int pfirst< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:1121
EIGEN_STRONG_INLINE Packet4i plset< Packet4i >(const int &a)
Definition: PacketMath.h:768
EIGEN_STRONG_INLINE Packet2d vec2d_unpacklo(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:98
EIGEN_STRONG_INLINE float predux< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:1444
EIGEN_STRONG_INLINE Packet2d pceil< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1182
EIGEN_STRONG_INLINE Packet4f ploadu< Packet4f >(const float *from)
Definition: PacketMath.h:968
EIGEN_STRONG_INLINE Packet4i pmul< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:796
EIGEN_STRONG_INLINE Packet4i pand< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:892
EIGEN_STRONG_INLINE Packet2d pmin< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:974
EIGEN_STRONG_INLINE Packet4f pselect(const Packet4f &mask, const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:917
EIGEN_STRONG_INLINE int predux< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:1454
EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Packet pfrexp_generic(const Packet &a, Packet &exponent)
Definition: GenericPacketMathFunctions.h:40
EIGEN_STRONG_INLINE Packet4f pand< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:891
EIGEN_STRONG_INLINE Packet16b pxor< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:425
EIGEN_DEVICE_FUNC Packet psub(const Packet &a, const Packet &b)
Definition: GenericPacketMath.h:222
EIGEN_DEVICE_FUNC Packet4i pgather< int, Packet4i >(const int *from, Index stride)
Definition: PacketMath.h:618
EIGEN_STRONG_INLINE int predux_max< Packet4i >(const Packet4i &a)
Definition: PacketMath.h:1698
EIGEN_DEVICE_FUNC Packet2d pgather< double, Packet2d >(const double *from, Index stride)
Definition: PacketMath.h:1033
EIGEN_STRONG_INLINE Packet4i pmax< Packet4i >(const Packet4i &a, const Packet4i &b)
Definition: PacketMath.h:861
EIGEN_STRONG_INLINE void pbroadcast4< Packet4f >(const float *a, Packet4f &a0, Packet4f &a1, Packet4f &a2, Packet4f &a3)
Definition: PacketMath.h:591
EIGEN_STRONG_INLINE Packet2d pset1frombits< Packet2d >(uint64_t from)
Definition: PacketMath.h:264
EIGEN_STRONG_INLINE Packet4i pload< Packet4i >(const int *from)
Definition: PacketMath.h:448
__vector float Packet4f
Definition: PacketMath.h:30
EIGEN_STRONG_INLINE Packet2d psub< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:895
EIGEN_STRONG_INLINE Packet4f psub< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:782
EIGEN_STRONG_INLINE Packet4f plset< Packet4f >(const float &a)
Definition: PacketMath.h:767
EIGEN_STRONG_INLINE Packet16b padd< Packet16b >(const Packet16b &a, const Packet16b &b)
Definition: PacketMath.h:293
EIGEN_STRONG_INLINE void pstoreu< float >(float *to, const Packet4f &from)
Definition: PacketMath.h:1088
EIGEN_STRONG_INLINE Packet2d pmax< Packet2d >(const Packet2d &a, const Packet2d &b)
Definition: PacketMath.h:989
EIGEN_STRONG_INLINE Packet4f pround< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:921
EIGEN_STRONG_INLINE Packet4f pcmp_lt_or_nan(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:870
EIGEN_STRONG_INLINE Packet2d pround< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1202
EIGEN_STRONG_INLINE void prefetch< int >(const int *addr)
Definition: PacketMath.h:1118
EIGEN_STRONG_INLINE double pfirst< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1061
EIGEN_STRONG_INLINE bool predux_mul< Packet16b >(const Packet16b &a)
Definition: PacketMath.h:1054
EIGEN_STRONG_INLINE Packet4f print< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:940
EIGEN_STRONG_INLINE Packet2d pfloor< Packet2d >(const Packet2d &a)
Definition: PacketMath.h:1163
EIGEN_STRONG_INLINE float predux_min< Packet4f >(const Packet4f &a)
Definition: PacketMath.h:1613
EIGEN_STRONG_INLINE void prefetch< double >(const double *addr)
Definition: PacketMath.h:692
EIGEN_STRONG_INLINE Packet4f pmax< Packet4f >(const Packet4f &a, const Packet4f &b)
Definition: PacketMath.h:850
Namespace containing all symbols from the Eigen library.
Definition: LDLT.h:16
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:74
Definition: document.h:416
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1181
unsigned __int64 uint64_t
Definition: stdint.h:136
numext::uint16_t x
Definition: Half.h:104
Definition: GenericPacketMath.h:1014
Packet packet[N]
Definition: GenericPacketMath.h:1015
Definition: GenericPacketMath.h:43
@ HasRsqrt
Definition: GenericPacketMath.h:67
@ HasSin
Definition: GenericPacketMath.h:75
@ HasBlend
Definition: GenericPacketMath.h:60
@ HasNdtri
Definition: GenericPacketMath.h:90
@ HasCos
Definition: GenericPacketMath.h:76
@ HasCmp
Definition: GenericPacketMath.h:63
@ HasShift
Definition: GenericPacketMath.h:49
@ HasLog1p
Definition: GenericPacketMath.h:71
@ HasCeil
Definition: GenericPacketMath.h:101
@ HasExp
Definition: GenericPacketMath.h:68
@ HasRound
Definition: GenericPacketMath.h:98
@ HasRint
Definition: GenericPacketMath.h:99
@ HasSqrt
Definition: GenericPacketMath.h:66
@ HasErf
Definition: GenericPacketMath.h:88
@ HasBessel
Definition: GenericPacketMath.h:91
@ HasExpm1
Definition: GenericPacketMath.h:69
@ HasLog
Definition: GenericPacketMath.h:70
@ HasTanh
Definition: GenericPacketMath.h:83
@ HasFloor
Definition: GenericPacketMath.h:100
@ HasDiv
Definition: GenericPacketMath.h:65
Definition: GenericPacketMath.h:160
@ value
Definition: Meta.h:133
Packet16b half
Definition: PacketMath.h:201
Packet16b type
Definition: PacketMath.h:200
Packet2d half
Definition: PacketMath.h:161
Packet2d type
Definition: PacketMath.h:160
Packet4f type
Definition: PacketMath.h:127
Packet4f half
Definition: PacketMath.h:128
Packet4i type
Definition: PacketMath.h:186
Packet4i half
Definition: PacketMath.h:187
Definition: GenericPacketMath.h:107
@ HasSub
Definition: GenericPacketMath.h:118
@ HasMax
Definition: GenericPacketMath.h:124
@ HasNegate
Definition: GenericPacketMath.h:120
@ HasMul
Definition: GenericPacketMath.h:119
@ HasAdd
Definition: GenericPacketMath.h:117
@ HasSetLinear
Definition: GenericPacketMath.h:126
@ HasMin
Definition: GenericPacketMath.h:123
@ HasConj
Definition: GenericPacketMath.h:125
@ HasAbs2
Definition: GenericPacketMath.h:122
@ HasAbs
Definition: GenericPacketMath.h:121
T type
Definition: GenericPacketMath.h:108
T half
Definition: GenericPacketMath.h:109
@ HasHalfPacket
Definition: GenericPacketMath.h:114
@ size
Definition: GenericPacketMath.h:112
@ AlignedOnScalar
Definition: GenericPacketMath.h:113
@ Vectorizable
Definition: GenericPacketMath.h:111
@ value
Definition: XprHelper.h:711
Definition: PacketMath.h:56
@ mask
Definition: PacketMath.h:57
Packet16b half
Definition: PacketMath.h:240
bool type
Definition: PacketMath.h:239
double type
Definition: PacketMath.h:229
Packet2d half
Definition: PacketMath.h:230
Packet4i integer_packet
Definition: PacketMath.h:225
Packet4f half
Definition: PacketMath.h:224
float type
Definition: PacketMath.h:223
int type
Definition: PacketMath.h:234
Packet4i half
Definition: PacketMath.h:235
Definition: GenericPacketMath.h:133
T type
Definition: GenericPacketMath.h:134
@ masked_load_available
Definition: GenericPacketMath.h:141
@ size
Definition: GenericPacketMath.h:138
@ masked_store_available
Definition: GenericPacketMath.h:142
@ vectorizable
Definition: GenericPacketMath.h:140
@ alignment
Definition: GenericPacketMath.h:139
T half
Definition: GenericPacketMath.h:135
Definition: PacketMath.h:47