15 #ifndef CEREAL_RAPIDJSON_INTERNAL_REGEX_H_
16 #define CEREAL_RAPIDJSON_INTERNAL_REGEX_H_
18 #include "../allocators.h"
19 #include "../stream.h"
23 CEREAL_RAPIDJSON_DIAG_PUSH
24 CEREAL_RAPIDJSON_DIAG_OFF(padded)
25 CEREAL_RAPIDJSON_DIAG_OFF(
switch-
enum)
26 CEREAL_RAPIDJSON_DIAG_OFF(implicit-fallthrough)
27 #elif defined(_MSC_VER)
28 CEREAL_RAPIDJSON_DIAG_PUSH
29 CEREAL_RAPIDJSON_DIAG_OFF(4512)
33 CEREAL_RAPIDJSON_DIAG_PUSH
34 CEREAL_RAPIDJSON_DIAG_OFF(effc++)
36 CEREAL_RAPIDJSON_DIAG_OFF(implicit-fallthrough)
40 #ifndef CEREAL_RAPIDJSON_REGEX_VERBOSE
41 #define CEREAL_RAPIDJSON_REGEX_VERBOSE 0
50 template <
typename SourceStream,
typename Encoding>
78 template <
typename Encoding,
typename Allocator>
113 template <
typename Encoding,
typename Allocator = CrtAllocator>
117 typedef typename Encoding::Ch
Ch;
175 return states_.template Bottom<State>()[index];
180 return states_.template Bottom<State>()[index];
185 return ranges_.template Bottom<Range>()[index];
190 return ranges_.template Bottom<Range>()[index];
193 template <
typename InputStream>
199 *atomCountStack.template Push<unsigned>() = 0;
202 while (ds.
Peek() != 0) {
203 switch (codepoint = ds.
Take()) {
213 while (!operatorStack.
Empty() && *operatorStack.template Top<Operator>() <
kAlternation)
214 if (!
Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
216 *operatorStack.template Push<Operator>() =
kAlternation;
217 *atomCountStack.template Top<unsigned>() = 0;
222 *atomCountStack.template Push<unsigned>() = 0;
227 if (!
Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
229 if (operatorStack.
Empty())
231 operatorStack.template Pop<Operator>(1);
232 atomCountStack.template Pop<unsigned>(1);
257 if (ds.
Peek() ==
',') {
259 if (ds.
Peek() ==
'}')
285 *operandStack.template Push<Frag>() =
Frag(s, s, s);
301 while (!operatorStack.
Empty())
302 if (!
Eval(operandStack, *operatorStack.template Pop<Operator>(1)))
307 Frag* e = operandStack.template Pop<Frag>(1);
311 #if CEREAL_RAPIDJSON_REGEX_VERBOSE
312 printf(
"root: %d\n",
root_);
315 printf(
"[%2d] out: %2d out1: %2d c: '%c'\n", i, s.
out, s.
out1, (
char)s.
codepoint);
333 *operandStack.template Push<Frag>() =
Frag(s, s, s);
337 if (*atomCountStack.template Top<unsigned>())
339 (*atomCountStack.template Top<unsigned>())++;
362 Frag e2 = *operandStack.template Pop<Frag>(1);
363 Frag e1 = *operandStack.template Pop<Frag>(1);
371 Frag e2 = *operandStack.template Pop<Frag>(1);
372 Frag e1 = *operandStack.template Pop<Frag>(1);
381 Frag e = *operandStack.template Pop<Frag>(1);
390 Frag e = *operandStack.template Pop<Frag>(1);
393 *operandStack.template Push<Frag>() =
Frag(s, s, e.
minIndex);
400 Frag e = *operandStack.template Pop<Frag>(1);
425 for (
unsigned i = 0; i < m - 1; i++)
427 for (
unsigned i = 0; i < m - 1; i++)
433 for (
unsigned i = 0; i < n - 1; i++)
441 for (
unsigned i = n; i < m - 1; i++)
443 for (
unsigned i = n; i < m; i++)
447 for (
unsigned i = 0; i < n - 1; i++)
456 const Frag src = *operandStack.template Top<Frag>();
460 for (
SizeType j = 0; j < count; j++) {
466 *operandStack.template Push<Frag>() =
Frag(src.
start + count, src.
out + count, src.
minIndex + count);
470 template <
typename InputStream>
473 if (ds.
Peek() <
'0' || ds.
Peek() >
'9')
475 while (ds.
Peek() >=
'0' && ds.
Peek() <=
'9') {
476 if (r >= 429496729 && ds.
Peek() >
'5')
478 r = r * 10 + (ds.
Take() -
'0');
484 template <
typename InputStream>
492 while ((codepoint = ds.
Take()) != 0) {
495 if (codepoint ==
'^') {
516 if (ds.
Peek() ==
'b') {
527 if (codepoint ==
'-') {
562 template <
typename InputStream>
565 switch (codepoint = ds.
Take()) {
580 *escapedCodepoint = codepoint;
return true;
581 case 'f': *escapedCodepoint = 0x000C;
return true;
582 case 'n': *escapedCodepoint = 0x000A;
return true;
583 case 'r': *escapedCodepoint = 0x000D;
return true;
584 case 't': *escapedCodepoint = 0x0009;
return true;
585 case 'v': *escapedCodepoint = 0x000B;
return true;
606 template <
typename RegexType,
typename Allocator = CrtAllocator>
610 typedef typename Encoding::Ch
Ch;
629 template <
typename InputStream>
639 template <
typename InputStream>
650 typedef typename RegexType::State
State;
651 typedef typename RegexType::Range
Range;
653 template <
typename InputStream>
664 while (!current->
Empty() && (codepoint = ds.
Take()) != 0) {
668 for (
const SizeType* s = current->template Bottom<SizeType>(); s != current->template End<SizeType>(); ++s) {
670 if (sr.codepoint == codepoint ||
671 sr.codepoint == RegexType::kAnyCharacterClass ||
672 (sr.codepoint == RegexType::kRangeCharacterClass &&
MatchRange(sr.rangeStart, codepoint)))
674 matched =
AddState(*next, sr.out) || matched;
675 if (!anchorEnd && matched)
688 return (
regex_.stateCount_ + 31) / 32 * 4;
698 return AddState(l, s.out1) || matched;
700 else if (!(
stateSet_[index >> 5] & (1u << (index & 31)))) {
701 stateSet_[index >> 5] |= (1u << (index & 31));
702 *l.template PushUnsafe<SizeType>() = index;
708 bool yes = (
regex_.GetRange(rangeIndex).start & RegexType::kRangeNegationFlag) == 0;
711 if (codepoint >= (r.start & ~RegexType::kRangeNegationFlag) && codepoint <= r.end)
733 CEREAL_RAPIDJSON_DIAG_POP
736 #if defined(__clang__) || defined(_MSC_VER)
737 CEREAL_RAPIDJSON_DIAG_POP
#define CEREAL_RAPIDJSON_ASSERT(x)
Definition: json.hpp:50
EncodingType
Definition: Xml.h:26
DecodedStream(SourceStream &ss)
Definition: regex.h:53
unsigned Take()
Definition: regex.h:55
unsigned codepoint_
Definition: regex.h:69
unsigned Peek()
Definition: regex.h:54
SourceStream & ss_
Definition: regex.h:68
void Decode()
Definition: regex.h:63
Stack< Allocator > state1_
Definition: regex.h:722
RegexType::State State
Definition: regex.h:650
size_t GetStateSetSize() const
Definition: regex.h:687
bool MatchRange(SizeType rangeIndex, unsigned codepoint) const
Definition: regex.h:707
uint32_t * stateSet_
Definition: regex.h:723
Stack< Allocator > state0_
Definition: regex.h:721
GenericRegexSearch(const RegexType ®ex, Allocator *allocator=0)
Definition: regex.h:612
bool Search(InputStream &is)
Definition: regex.h:640
RegexType::EncodingType Encoding
Definition: regex.h:609
Allocator * ownAllocator_
Definition: regex.h:720
bool SearchWithAnchoring(InputStream &is, bool anchorBegin, bool anchorEnd)
Definition: regex.h:654
Encoding::Ch Ch
Definition: regex.h:610
bool Search(const Ch *s)
Definition: regex.h:644
bool Match(const Ch *s)
Definition: regex.h:634
bool AddState(Stack< Allocator > &l, SizeType index)
Definition: regex.h:692
Allocator * allocator_
Definition: regex.h:719
RegexType::Range Range
Definition: regex.h:651
const RegexType & regex_
Definition: regex.h:718
~GenericRegexSearch()
Definition: regex.h:624
bool Match(InputStream &is)
Definition: regex.h:630
Regular expression engine with subset of ECMAscript grammar.
Definition: regex.h:114
void Parse(DecodedStream< InputStream, Encoding > &ds)
Definition: regex.h:194
static SizeType Min(SizeType a, SizeType b)
Definition: regex.h:453
bool Eval(Stack< Allocator > &operandStack, Operator op)
Definition: regex.h:357
static const unsigned kInfinityQuantifier
Definition: regex.h:599
bool ParseUnsigned(DecodedStream< InputStream, Encoding > &ds, unsigned *u)
Definition: regex.h:471
void PushOperand(Stack< Allocator > &operandStack, unsigned codepoint)
Definition: regex.h:331
static const unsigned kAnyCharacterClass
For '.'.
Definition: regex.h:149
State & GetState(SizeType index)
Definition: regex.h:173
SizeType root_
Definition: regex.h:595
Allocator * allocator_
Definition: regex.h:592
bool IsValid() const
Definition: regex.h:135
GenericRegex(const Ch *source, Allocator *allocator=0)
Definition: regex.h:120
void ImplicitConcatenation(Stack< Allocator > &atomCountStack, Stack< Allocator > &operatorStack)
Definition: regex.h:336
void CloneTopOperand(Stack< Allocator > &operandStack)
Definition: regex.h:455
Encoding::Ch Ch
Definition: regex.h:117
Stack< Allocator > states_
Definition: regex.h:593
const State & GetState(SizeType index) const
Definition: regex.h:178
Allocator * ownAllocator_
Definition: regex.h:591
~GenericRegex()
Definition: regex.h:130
bool EvalQuantifier(Stack< Allocator > &operandStack, unsigned n, unsigned m)
Definition: regex.h:414
SizeType NewRange(unsigned codepoint)
Definition: regex.h:555
Encoding EncodingType
Definition: regex.h:116
void Patch(SizeType l, SizeType s)
Definition: regex.h:350
SizeType Append(SizeType l1, SizeType l2)
Definition: regex.h:342
bool ParseRange(DecodedStream< InputStream, Encoding > &ds, SizeType *range)
Definition: regex.h:485
Range & GetRange(SizeType index)
Definition: regex.h:183
bool anchorBegin_
Definition: regex.h:602
Operator
Definition: regex.h:140
@ kOneOrMore
Definition: regex.h:143
@ kAlternation
Definition: regex.h:145
@ kConcatenation
Definition: regex.h:144
@ kZeroOrMore
Definition: regex.h:142
@ kZeroOrOne
Definition: regex.h:141
@ kLeftParenthesis
Definition: regex.h:146
SizeType NewState(SizeType out, SizeType out1, unsigned codepoint)
Definition: regex.h:322
SizeType rangeCount_
Definition: regex.h:597
bool anchorEnd_
Definition: regex.h:603
static const unsigned kRangeNegationFlag
Definition: regex.h:151
bool CharacterEscape(DecodedStream< InputStream, Encoding > &ds, unsigned *escapedCodepoint)
Definition: regex.h:563
const Range & GetRange(SizeType index) const
Definition: regex.h:188
static const unsigned kRangeCharacterClass
Definition: regex.h:150
SizeType stateCount_
Definition: regex.h:596
Stack< Allocator > ranges_
Definition: regex.h:594
A type-unsafe stack for storing different types of data.
Definition: stack.h:37
bool Empty() const
Definition: stack.h:177
size_t GetSize() const
Definition: stack.h:178
Concept for allocating, resizing and freeing memory block.
Concept for encoding of Unicode characters.
#define CEREAL_RAPIDJSON_NAMESPACE_BEGIN
provide custom rapidjson namespace (opening expression)
Definition: rapidjson.h:121
#define CEREAL_RAPIDJSON_NAMESPACE_END
provide custom rapidjson namespace (closing expression)
Definition: rapidjson.h:124
std::true_type yes
Definition: traits.hpp:49
Definition: document.h:416
GenericRegexSearch< Regex > RegexSearch
Definition: regex.h:727
void Swap(T &a, T &b) CEREAL_RAPIDJSON_NOEXCEPT
Custom swap() to avoid dependency on C++ <algorithm> header.
Definition: swap.h:33
static const SizeType kRegexInvalidState
Represents an invalid index in GenericRegex::State::out, out1.
Definition: regex.h:75
GenericRegex< UTF8<> > Regex
Definition: regex.h:726
static const SizeType kRegexInvalidRange
Definition: regex.h:76
const CharType(& source)[N]
Definition: pointer.h:1204
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition: pointer.h:1181
#define CEREAL_RAPIDJSON_DELETE(x)
! customization point for global delete
Definition: rapidjson.h:631
#define CEREAL_RAPIDJSON_NEW(TypeName)
! customization point for global new
Definition: rapidjson.h:627
CEREAL_RAPIDJSON_NAMESPACE_BEGIN typedef unsigned SizeType
Size type (for string lengths, array sizes, etc.)
Definition: rapidjson.h:384
unsigned int uint32_t
Definition: stdint.h:126
Read-only string stream.
Definition: stream.h:154
Frag(SizeType s, SizeType o, SizeType m)
Definition: regex.h:167
SizeType out
link-list of all output states
Definition: regex.h:169
SizeType minIndex
Definition: regex.h:170
SizeType start
Definition: regex.h:168
unsigned end
Definition: regex.h:155
unsigned start
Definition: regex.h:154
SizeType next
Definition: regex.h:156
SizeType out1
Equals to non-kInvalid for split.
Definition: regex.h:161
SizeType out
Equals to kInvalid for matching state.
Definition: regex.h:160
SizeType rangeStart
Definition: regex.h:162
unsigned codepoint
Definition: regex.h:163