NDDEM
diyfp.h
Go to the documentation of this file.
1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 // This is a C++ header-only implementation of Grisu2 algorithm from the publication:
16 // Loitsch, Florian. "Printing floating-point numbers quickly and accurately with
17 // integers." ACM Sigplan Notices 45.6 (2010): 233-243.
18 
19 #ifndef CEREAL_RAPIDJSON_DIYFP_H_
20 #define CEREAL_RAPIDJSON_DIYFP_H_
21 
22 #include "../rapidjson.h"
23 #include <limits>
24 
25 #if defined(_MSC_VER) && defined(_M_AMD64) && !defined(__INTEL_COMPILER)
26 #include <intrin.h>
27 #pragma intrinsic(_BitScanReverse64)
28 #pragma intrinsic(_umul128)
29 #endif
30 
32 namespace internal {
33 
34 #ifdef __GNUC__
35 CEREAL_RAPIDJSON_DIAG_PUSH
36 CEREAL_RAPIDJSON_DIAG_OFF(effc++)
37 #endif
38 
39 #ifdef __clang__
40 CEREAL_RAPIDJSON_DIAG_PUSH
41 CEREAL_RAPIDJSON_DIAG_OFF(padded)
42 #endif
43 
44 struct DiyFp {
45  DiyFp() : f(), e() {}
46 
47  DiyFp(uint64_t fp, int exp) : f(fp), e(exp) {}
48 
49  explicit DiyFp(double d) {
50  union {
51  double d;
52  uint64_t u64;
53  } u = { d };
54 
55  int biased_e = static_cast<int>((u.u64 & kDpExponentMask) >> kDpSignificandSize);
56  uint64_t significand = (u.u64 & kDpSignificandMask);
57  if (biased_e != 0) {
58  f = significand + kDpHiddenBit;
59  e = biased_e - kDpExponentBias;
60  }
61  else {
62  f = significand;
63  e = kDpMinExponent + 1;
64  }
65  }
66 
67  DiyFp operator-(const DiyFp& rhs) const {
68  return DiyFp(f - rhs.f, e);
69  }
70 
71  DiyFp operator*(const DiyFp& rhs) const {
72 #if defined(_MSC_VER) && defined(_M_AMD64)
73  uint64_t h;
74  uint64_t l = _umul128(f, rhs.f, &h);
75  if (l & (uint64_t(1) << 63)) // rounding
76  h++;
77  return DiyFp(h, e + rhs.e + 64);
78 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
79  __extension__ typedef unsigned __int128 uint128;
80  uint128 p = static_cast<uint128>(f) * static_cast<uint128>(rhs.f);
81  uint64_t h = static_cast<uint64_t>(p >> 64);
82  uint64_t l = static_cast<uint64_t>(p);
83  if (l & (uint64_t(1) << 63)) // rounding
84  h++;
85  return DiyFp(h, e + rhs.e + 64);
86 #else
87  const uint64_t M32 = 0xFFFFFFFF;
88  const uint64_t a = f >> 32;
89  const uint64_t b = f & M32;
90  const uint64_t c = rhs.f >> 32;
91  const uint64_t d = rhs.f & M32;
92  const uint64_t ac = a * c;
93  const uint64_t bc = b * c;
94  const uint64_t ad = a * d;
95  const uint64_t bd = b * d;
96  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
97  tmp += 1U << 31;
98  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
99 #endif
100  }
101 
102  DiyFp Normalize() const {
103  CEREAL_RAPIDJSON_ASSERT(f != 0); // https://stackoverflow.com/a/26809183/291737
104 #if defined(_MSC_VER) && defined(_M_AMD64)
105  unsigned long index;
106  _BitScanReverse64(&index, f);
107  return DiyFp(f << (63 - index), e - (63 - index));
108 #elif defined(__GNUC__) && __GNUC__ >= 4
109  int s = __builtin_clzll(f);
110  return DiyFp(f << s, e - s);
111 #else
112  DiyFp res = *this;
113  while (!(res.f & (static_cast<uint64_t>(1) << 63))) {
114  res.f <<= 1;
115  res.e--;
116  }
117  return res;
118 #endif
119  }
120 
122  DiyFp res = *this;
123  while (!(res.f & (kDpHiddenBit << 1))) {
124  res.f <<= 1;
125  res.e--;
126  }
127  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
128  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
129  return res;
130  }
131 
132  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
133  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
134  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
135  mi.f <<= mi.e - pl.e;
136  mi.e = pl.e;
137  *plus = pl;
138  *minus = mi;
139  }
140 
141  double ToDouble() const {
142  union {
143  double d;
144  uint64_t u64;
145  }u;
147  if (e < kDpDenormalExponent) {
148  // Underflow.
149  return 0.0;
150  }
151  if (e >= kDpMaxExponent) {
152  // Overflow.
153  return std::numeric_limits<double>::infinity();
154  }
155  const uint64_t be = (e == kDpDenormalExponent && (f & kDpHiddenBit) == 0) ? 0 :
156  static_cast<uint64_t>(e + kDpExponentBias);
157  u.u64 = (f & kDpSignificandMask) | (be << kDpSignificandSize);
158  return u.d;
159  }
160 
161  static const int kDiySignificandSize = 64;
162  static const int kDpSignificandSize = 52;
163  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
164  static const int kDpMaxExponent = 0x7FF - kDpExponentBias;
165  static const int kDpMinExponent = -kDpExponentBias;
166  static const int kDpDenormalExponent = -kDpExponentBias + 1;
167  static const uint64_t kDpExponentMask = CEREAL_RAPIDJSON_UINT64_C2(0x7FF00000, 0x00000000);
168  static const uint64_t kDpSignificandMask = CEREAL_RAPIDJSON_UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
169  static const uint64_t kDpHiddenBit = CEREAL_RAPIDJSON_UINT64_C2(0x00100000, 0x00000000);
170 
172  int e;
173 };
174 
175 inline DiyFp GetCachedPowerByIndex(size_t index) {
176  // 10^-348, 10^-340, ..., 10^340
177  static const uint64_t kCachedPowers_F[] = {
178  CEREAL_RAPIDJSON_UINT64_C2(0xfa8fd5a0, 0x081c0288), CEREAL_RAPIDJSON_UINT64_C2(0xbaaee17f, 0xa23ebf76),
179  CEREAL_RAPIDJSON_UINT64_C2(0x8b16fb20, 0x3055ac76), CEREAL_RAPIDJSON_UINT64_C2(0xcf42894a, 0x5dce35ea),
180  CEREAL_RAPIDJSON_UINT64_C2(0x9a6bb0aa, 0x55653b2d), CEREAL_RAPIDJSON_UINT64_C2(0xe61acf03, 0x3d1a45df),
181  CEREAL_RAPIDJSON_UINT64_C2(0xab70fe17, 0xc79ac6ca), CEREAL_RAPIDJSON_UINT64_C2(0xff77b1fc, 0xbebcdc4f),
182  CEREAL_RAPIDJSON_UINT64_C2(0xbe5691ef, 0x416bd60c), CEREAL_RAPIDJSON_UINT64_C2(0x8dd01fad, 0x907ffc3c),
183  CEREAL_RAPIDJSON_UINT64_C2(0xd3515c28, 0x31559a83), CEREAL_RAPIDJSON_UINT64_C2(0x9d71ac8f, 0xada6c9b5),
184  CEREAL_RAPIDJSON_UINT64_C2(0xea9c2277, 0x23ee8bcb), CEREAL_RAPIDJSON_UINT64_C2(0xaecc4991, 0x4078536d),
185  CEREAL_RAPIDJSON_UINT64_C2(0x823c1279, 0x5db6ce57), CEREAL_RAPIDJSON_UINT64_C2(0xc2109436, 0x4dfb5637),
186  CEREAL_RAPIDJSON_UINT64_C2(0x9096ea6f, 0x3848984f), CEREAL_RAPIDJSON_UINT64_C2(0xd77485cb, 0x25823ac7),
187  CEREAL_RAPIDJSON_UINT64_C2(0xa086cfcd, 0x97bf97f4), CEREAL_RAPIDJSON_UINT64_C2(0xef340a98, 0x172aace5),
188  CEREAL_RAPIDJSON_UINT64_C2(0xb23867fb, 0x2a35b28e), CEREAL_RAPIDJSON_UINT64_C2(0x84c8d4df, 0xd2c63f3b),
189  CEREAL_RAPIDJSON_UINT64_C2(0xc5dd4427, 0x1ad3cdba), CEREAL_RAPIDJSON_UINT64_C2(0x936b9fce, 0xbb25c996),
190  CEREAL_RAPIDJSON_UINT64_C2(0xdbac6c24, 0x7d62a584), CEREAL_RAPIDJSON_UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
191  CEREAL_RAPIDJSON_UINT64_C2(0xf3e2f893, 0xdec3f126), CEREAL_RAPIDJSON_UINT64_C2(0xb5b5ada8, 0xaaff80b8),
192  CEREAL_RAPIDJSON_UINT64_C2(0x87625f05, 0x6c7c4a8b), CEREAL_RAPIDJSON_UINT64_C2(0xc9bcff60, 0x34c13053),
193  CEREAL_RAPIDJSON_UINT64_C2(0x964e858c, 0x91ba2655), CEREAL_RAPIDJSON_UINT64_C2(0xdff97724, 0x70297ebd),
194  CEREAL_RAPIDJSON_UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), CEREAL_RAPIDJSON_UINT64_C2(0xf8a95fcf, 0x88747d94),
195  CEREAL_RAPIDJSON_UINT64_C2(0xb9447093, 0x8fa89bcf), CEREAL_RAPIDJSON_UINT64_C2(0x8a08f0f8, 0xbf0f156b),
196  CEREAL_RAPIDJSON_UINT64_C2(0xcdb02555, 0x653131b6), CEREAL_RAPIDJSON_UINT64_C2(0x993fe2c6, 0xd07b7fac),
197  CEREAL_RAPIDJSON_UINT64_C2(0xe45c10c4, 0x2a2b3b06), CEREAL_RAPIDJSON_UINT64_C2(0xaa242499, 0x697392d3),
198  CEREAL_RAPIDJSON_UINT64_C2(0xfd87b5f2, 0x8300ca0e), CEREAL_RAPIDJSON_UINT64_C2(0xbce50864, 0x92111aeb),
199  CEREAL_RAPIDJSON_UINT64_C2(0x8cbccc09, 0x6f5088cc), CEREAL_RAPIDJSON_UINT64_C2(0xd1b71758, 0xe219652c),
200  CEREAL_RAPIDJSON_UINT64_C2(0x9c400000, 0x00000000), CEREAL_RAPIDJSON_UINT64_C2(0xe8d4a510, 0x00000000),
201  CEREAL_RAPIDJSON_UINT64_C2(0xad78ebc5, 0xac620000), CEREAL_RAPIDJSON_UINT64_C2(0x813f3978, 0xf8940984),
202  CEREAL_RAPIDJSON_UINT64_C2(0xc097ce7b, 0xc90715b3), CEREAL_RAPIDJSON_UINT64_C2(0x8f7e32ce, 0x7bea5c70),
203  CEREAL_RAPIDJSON_UINT64_C2(0xd5d238a4, 0xabe98068), CEREAL_RAPIDJSON_UINT64_C2(0x9f4f2726, 0x179a2245),
204  CEREAL_RAPIDJSON_UINT64_C2(0xed63a231, 0xd4c4fb27), CEREAL_RAPIDJSON_UINT64_C2(0xb0de6538, 0x8cc8ada8),
205  CEREAL_RAPIDJSON_UINT64_C2(0x83c7088e, 0x1aab65db), CEREAL_RAPIDJSON_UINT64_C2(0xc45d1df9, 0x42711d9a),
206  CEREAL_RAPIDJSON_UINT64_C2(0x924d692c, 0xa61be758), CEREAL_RAPIDJSON_UINT64_C2(0xda01ee64, 0x1a708dea),
207  CEREAL_RAPIDJSON_UINT64_C2(0xa26da399, 0x9aef774a), CEREAL_RAPIDJSON_UINT64_C2(0xf209787b, 0xb47d6b85),
208  CEREAL_RAPIDJSON_UINT64_C2(0xb454e4a1, 0x79dd1877), CEREAL_RAPIDJSON_UINT64_C2(0x865b8692, 0x5b9bc5c2),
209  CEREAL_RAPIDJSON_UINT64_C2(0xc83553c5, 0xc8965d3d), CEREAL_RAPIDJSON_UINT64_C2(0x952ab45c, 0xfa97a0b3),
210  CEREAL_RAPIDJSON_UINT64_C2(0xde469fbd, 0x99a05fe3), CEREAL_RAPIDJSON_UINT64_C2(0xa59bc234, 0xdb398c25),
211  CEREAL_RAPIDJSON_UINT64_C2(0xf6c69a72, 0xa3989f5c), CEREAL_RAPIDJSON_UINT64_C2(0xb7dcbf53, 0x54e9bece),
212  CEREAL_RAPIDJSON_UINT64_C2(0x88fcf317, 0xf22241e2), CEREAL_RAPIDJSON_UINT64_C2(0xcc20ce9b, 0xd35c78a5),
213  CEREAL_RAPIDJSON_UINT64_C2(0x98165af3, 0x7b2153df), CEREAL_RAPIDJSON_UINT64_C2(0xe2a0b5dc, 0x971f303a),
214  CEREAL_RAPIDJSON_UINT64_C2(0xa8d9d153, 0x5ce3b396), CEREAL_RAPIDJSON_UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
215  CEREAL_RAPIDJSON_UINT64_C2(0xbb764c4c, 0xa7a44410), CEREAL_RAPIDJSON_UINT64_C2(0x8bab8eef, 0xb6409c1a),
216  CEREAL_RAPIDJSON_UINT64_C2(0xd01fef10, 0xa657842c), CEREAL_RAPIDJSON_UINT64_C2(0x9b10a4e5, 0xe9913129),
217  CEREAL_RAPIDJSON_UINT64_C2(0xe7109bfb, 0xa19c0c9d), CEREAL_RAPIDJSON_UINT64_C2(0xac2820d9, 0x623bf429),
218  CEREAL_RAPIDJSON_UINT64_C2(0x80444b5e, 0x7aa7cf85), CEREAL_RAPIDJSON_UINT64_C2(0xbf21e440, 0x03acdd2d),
219  CEREAL_RAPIDJSON_UINT64_C2(0x8e679c2f, 0x5e44ff8f), CEREAL_RAPIDJSON_UINT64_C2(0xd433179d, 0x9c8cb841),
220  CEREAL_RAPIDJSON_UINT64_C2(0x9e19db92, 0xb4e31ba9), CEREAL_RAPIDJSON_UINT64_C2(0xeb96bf6e, 0xbadf77d9),
221  CEREAL_RAPIDJSON_UINT64_C2(0xaf87023b, 0x9bf0ee6b)
222  };
223  static const int16_t kCachedPowers_E[] = {
224  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
225  -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
226  -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
227  -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
228  -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
229  109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
230  375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
231  641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
232  907, 933, 960, 986, 1013, 1039, 1066
233  };
234  CEREAL_RAPIDJSON_ASSERT(index < 87);
235  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
236 }
237 
238 inline DiyFp GetCachedPower(int e, int* K) {
239 
240  //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
241  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
242  int k = static_cast<int>(dk);
243  if (dk - k > 0.0)
244  k++;
245 
246  unsigned index = static_cast<unsigned>((k >> 3) + 1);
247  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
248 
249  return GetCachedPowerByIndex(index);
250 }
251 
252 inline DiyFp GetCachedPower10(int exp, int *outExp) {
253  CEREAL_RAPIDJSON_ASSERT(exp >= -348);
254  unsigned index = static_cast<unsigned>(exp + 348) / 8u;
255  *outExp = -348 + static_cast<int>(index) * 8;
256  return GetCachedPowerByIndex(index);
257 }
258 
259 #ifdef __GNUC__
260 CEREAL_RAPIDJSON_DIAG_POP
261 #endif
262 
263 #ifdef __clang__
264 CEREAL_RAPIDJSON_DIAG_POP
265 CEREAL_RAPIDJSON_DIAG_OFF(padded)
266 #endif
267 
268 } // namespace internal
270 
271 #endif // CEREAL_RAPIDJSON_DIYFP_H_
#define CEREAL_RAPIDJSON_ASSERT(x)
Definition: json.hpp:50
#define CEREAL_RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:121
#define CEREAL_RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:124
uint d
Definition: document.h:416
DiyFp GetCachedPowerByIndex(size_t index)
Definition: diyfp.h:175
DiyFp GetCachedPower10(int exp, int *outExp)
Definition: diyfp.h:252
DiyFp GetCachedPower(int e, int *K)
Definition: diyfp.h:238
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1181
#define CEREAL_RAPIDJSON_UINT64_C2(high32, low32)
Construct a 64-bit literal by a pair of 32-bit integer.
Definition: rapidjson.h:289
signed short int16_t
Definition: stdint.h:122
unsigned __int64 uint64_t
Definition: stdint.h:136
Definition: diyfp.h:44
static const int kDpSignificandSize
Definition: diyfp.h:162
uint64_t f
Definition: diyfp.h:171
static const int kDpExponentBias
Definition: diyfp.h:163
DiyFp NormalizeBoundary() const
Definition: diyfp.h:121
static const uint64_t kDpHiddenBit
Definition: diyfp.h:169
static const int kDpMaxExponent
Definition: diyfp.h:164
static const uint64_t kDpSignificandMask
Definition: diyfp.h:168
DiyFp operator*(const DiyFp &rhs) const
Definition: diyfp.h:71
static const int kDpDenormalExponent
Definition: diyfp.h:166
DiyFp(uint64_t fp, int exp)
Definition: diyfp.h:47
static const int kDpMinExponent
Definition: diyfp.h:165
DiyFp operator-(const DiyFp &rhs) const
Definition: diyfp.h:67
DiyFp Normalize() const
Definition: diyfp.h:102
static const uint64_t kDpExponentMask
Definition: diyfp.h:167
static const int kDiySignificandSize
Definition: diyfp.h:161
double ToDouble() const
Definition: diyfp.h:141
DiyFp(double d)
Definition: diyfp.h:49
void NormalizedBoundaries(DiyFp *minus, DiyFp *plus) const
Definition: diyfp.h:132
DiyFp()
Definition: diyfp.h:45
int e
Definition: diyfp.h:172
#define __extension__
Definition: valgrind.h:3676