libstdc++
regex_compiler.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_compiler.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37  /**
38  * @addtogroup regex-detail
39  * @{
40  */
41 
42  /// Base class for scanner.
44  {
45  typedef unsigned int _StateT;
46 
47  static constexpr _StateT _S_state_at_start = 1 << 0;
48  static constexpr _StateT _S_state_in_brace = 1 << 2;
49  static constexpr _StateT _S_state_in_bracket = 1 << 3;
50 
51  virtual ~_Scanner_base() { };
52  };
53 
54  /**
55  * @brief struct _Scanner. Scans an input range for regex tokens.
56  *
57  * The %_Scanner class interprets the regular expression pattern in
58  * the input range passed to its constructor as a sequence of parse
59  * tokens passed to the regular expression compiler. The sequence
60  * of tokens provided depends on the flag settings passed to the
61  * constructor: different regular expression grammars will interpret
62  * the same input pattern in syntactically different ways.
63  */
64  template<typename _InputIterator>
65  class _Scanner: public _Scanner_base
66  {
67  public:
68  typedef _InputIterator _IteratorT;
69  typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
72  typedef const std::ctype<_CharT> _CtypeT;
73 
74  /// Token types returned from the scanner.
75  enum _TokenT
76  {
77  _S_token_anychar,
78  _S_token_backref,
79  _S_token_bracket_begin,
80  _S_token_bracket_end,
81  _S_token_inverse_class,
82  _S_token_char_class_name,
83  _S_token_closure0,
84  _S_token_closure1,
85  _S_token_collelem_multi,
86  _S_token_collelem_single,
87  _S_token_collsymbol,
88  _S_token_comma,
89  _S_token_dash,
90  _S_token_dup_count,
91  _S_token_eof,
92  _S_token_equiv_class_name,
93  _S_token_interval_begin,
94  _S_token_interval_end,
95  _S_token_line_begin,
96  _S_token_line_end,
97  _S_token_opt,
98  _S_token_or,
99  _S_token_ord_char,
100  _S_token_quoted_char,
101  _S_token_subexpr_begin,
102  _S_token_subexpr_end,
103  _S_token_word_begin,
104  _S_token_word_end,
105  _S_token_unknown
106  };
107 
108  _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
109  std::locale __loc)
110  : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
111  _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
112  { _M_advance(); }
113 
114  void
115  _M_advance();
116 
117  _TokenT
118  _M_token() const
119  { return _M_curToken; }
120 
121  const _StringT&
122  _M_value() const
123  { return _M_curValue; }
124 
125 #ifdef _GLIBCXX_DEBUG
126  std::ostream&
127  _M_print(std::ostream&);
128 #endif
129 
130  private:
131  void
132  _M_eat_escape();
133 
134  void
135  _M_scan_in_brace();
136 
137  void
138  _M_scan_in_bracket();
139 
140  void
141  _M_eat_charclass();
142 
143  void
144  _M_eat_equivclass();
145 
146  void
147  _M_eat_collsymbol();
148 
149  _IteratorT _M_current;
150  _IteratorT _M_end;
151  _FlagT _M_flags;
152  _CtypeT& _M_ctype;
153  _TokenT _M_curToken;
154  _StringT _M_curValue;
155  _StateT _M_state;
156  };
157 
158  template<typename _InputIterator>
159  void
160  _Scanner<_InputIterator>::
161  _M_advance()
162  {
163  if (_M_current == _M_end)
164  {
165  _M_curToken = _S_token_eof;
166  return;
167  }
168 
169  _CharT __c = *_M_current;
170  if (_M_state & _S_state_in_bracket)
171  {
172  _M_scan_in_bracket();
173  return;
174  }
175  if (_M_state & _S_state_in_brace)
176  {
177  _M_scan_in_brace();
178  return;
179  }
180 #if 0
181  // TODO: re-enable line anchors when _M_assertion is implemented.
182  // See PR libstdc++/47724
183  else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
184  {
185  _M_curToken = _S_token_line_begin;
186  ++_M_current;
187  return;
188  }
189  else if (__c == _M_ctype.widen('$'))
190  {
191  _M_curToken = _S_token_line_end;
192  ++_M_current;
193  return;
194  }
195 #endif
196  else if (__c == _M_ctype.widen('.'))
197  {
198  _M_curToken = _S_token_anychar;
199  ++_M_current;
200  return;
201  }
202  else if (__c == _M_ctype.widen('*'))
203  {
204  _M_curToken = _S_token_closure0;
205  ++_M_current;
206  return;
207  }
208  else if (__c == _M_ctype.widen('+'))
209  {
210  _M_curToken = _S_token_closure1;
211  ++_M_current;
212  return;
213  }
214  else if (__c == _M_ctype.widen('|'))
215  {
216  _M_curToken = _S_token_or;
217  ++_M_current;
218  return;
219  }
220  else if (__c == _M_ctype.widen('['))
221  {
222  _M_curToken = _S_token_bracket_begin;
223  _M_state |= (_S_state_in_bracket | _S_state_at_start);
224  ++_M_current;
225  return;
226  }
227  else if (__c == _M_ctype.widen('\\'))
228  {
229  _M_eat_escape();
230  return;
231  }
232  else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
233  {
234  if (__c == _M_ctype.widen('('))
235  {
236  _M_curToken = _S_token_subexpr_begin;
237  ++_M_current;
238  return;
239  }
240  else if (__c == _M_ctype.widen(')'))
241  {
242  _M_curToken = _S_token_subexpr_end;
243  ++_M_current;
244  return;
245  }
246  else if (__c == _M_ctype.widen('{'))
247  {
248  _M_curToken = _S_token_interval_begin;
249  _M_state |= _S_state_in_brace;
250  ++_M_current;
251  return;
252  }
253  }
254 
255  _M_curToken = _S_token_ord_char;
256  _M_curValue.assign(1, __c);
257  ++_M_current;
258  }
259 
260 
261  template<typename _InputIterator>
262  void
263  _Scanner<_InputIterator>::
264  _M_scan_in_brace()
265  {
266  if (_M_ctype.is(_CtypeT::digit, *_M_current))
267  {
268  _M_curToken = _S_token_dup_count;
269  _M_curValue.assign(1, *_M_current);
270  ++_M_current;
271  while (_M_current != _M_end
272  && _M_ctype.is(_CtypeT::digit, *_M_current))
273  {
274  _M_curValue += *_M_current;
275  ++_M_current;
276  }
277  return;
278  }
279  else if (*_M_current == _M_ctype.widen(','))
280  {
281  _M_curToken = _S_token_comma;
282  ++_M_current;
283  return;
284  }
286  {
287  if (*_M_current == _M_ctype.widen('\\'))
288  _M_eat_escape();
289  }
290  else
291  {
292  if (*_M_current == _M_ctype.widen('}'))
293  {
294  _M_curToken = _S_token_interval_end;
295  _M_state &= ~_S_state_in_brace;
296  ++_M_current;
297  return;
298  }
299  }
300  }
301 
302  template<typename _InputIterator>
303  void
304  _Scanner<_InputIterator>::
305  _M_scan_in_bracket()
306  {
307  if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
308  {
309  _M_curToken = _S_token_inverse_class;
310  _M_state &= ~_S_state_at_start;
311  ++_M_current;
312  return;
313  }
314  else if (*_M_current == _M_ctype.widen('['))
315  {
316  ++_M_current;
317  if (_M_current == _M_end)
318  {
319  _M_curToken = _S_token_eof;
320  return;
321  }
322 
323  if (*_M_current == _M_ctype.widen('.'))
324  {
325  _M_curToken = _S_token_collsymbol;
326  _M_eat_collsymbol();
327  return;
328  }
329  else if (*_M_current == _M_ctype.widen(':'))
330  {
331  _M_curToken = _S_token_char_class_name;
332  _M_eat_charclass();
333  return;
334  }
335  else if (*_M_current == _M_ctype.widen('='))
336  {
337  _M_curToken = _S_token_equiv_class_name;
338  _M_eat_equivclass();
339  return;
340  }
341  }
342  else if (*_M_current == _M_ctype.widen('-'))
343  {
344  _M_curToken = _S_token_dash;
345  ++_M_current;
346  return;
347  }
348  else if (*_M_current == _M_ctype.widen(']'))
349  {
350  if (!(_M_flags & regex_constants::ECMAScript)
351  || !(_M_state & _S_state_at_start))
352  {
353  // special case: only if _not_ chr first after
354  // '[' or '[^' and if not ECMAscript
355  _M_curToken = _S_token_bracket_end;
356  ++_M_current;
357  return;
358  }
359  }
360  _M_curToken = _S_token_collelem_single;
361  _M_curValue.assign(1, *_M_current);
362  ++_M_current;
363  }
364 
365  template<typename _InputIterator>
366  void
367  _Scanner<_InputIterator>::
368  _M_eat_escape()
369  {
370  ++_M_current;
371  if (_M_current == _M_end)
372  {
373  _M_curToken = _S_token_eof;
374  return;
375  }
376  _CharT __c = *_M_current;
377  ++_M_current;
378 
379  if (__c == _M_ctype.widen('('))
380  {
381  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
382  {
383  _M_curToken = _S_token_ord_char;
384  _M_curValue.assign(1, __c);
385  }
386  else
387  _M_curToken = _S_token_subexpr_begin;
388  }
389  else if (__c == _M_ctype.widen(')'))
390  {
391  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
392  {
393  _M_curToken = _S_token_ord_char;
394  _M_curValue.assign(1, __c);
395  }
396  else
397  _M_curToken = _S_token_subexpr_end;
398  }
399  else if (__c == _M_ctype.widen('{'))
400  {
401  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
402  {
403  _M_curToken = _S_token_ord_char;
404  _M_curValue.assign(1, __c);
405  }
406  else
407  {
408  _M_curToken = _S_token_interval_begin;
409  _M_state |= _S_state_in_brace;
410  }
411  }
412  else if (__c == _M_ctype.widen('}'))
413  {
414  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
415  {
416  _M_curToken = _S_token_ord_char;
417  _M_curValue.assign(1, __c);
418  }
419  else
420  {
421  if (!(_M_state && _S_state_in_brace))
422  __throw_regex_error(regex_constants::error_badbrace);
423  _M_state &= ~_S_state_in_brace;
424  _M_curToken = _S_token_interval_end;
425  }
426  }
427  else if (__c == _M_ctype.widen('x'))
428  {
429  ++_M_current;
430  if (_M_current == _M_end)
431  {
432  _M_curToken = _S_token_eof;
433  return;
434  }
435  if (_M_ctype.is(_CtypeT::digit, *_M_current))
436  {
437  _M_curValue.assign(1, *_M_current);
438  ++_M_current;
439  if (_M_current == _M_end)
440  {
441  _M_curToken = _S_token_eof;
442  return;
443  }
444  if (_M_ctype.is(_CtypeT::digit, *_M_current))
445  {
446  _M_curValue += *_M_current;
447  ++_M_current;
448  return;
449  }
450  }
451  }
452  else if (__c == _M_ctype.widen('^')
453  || __c == _M_ctype.widen('.')
454  || __c == _M_ctype.widen('*')
455  || __c == _M_ctype.widen('$')
456  || __c == _M_ctype.widen('\\'))
457  {
458  _M_curToken = _S_token_ord_char;
459  _M_curValue.assign(1, __c);
460  }
461  else if (_M_ctype.is(_CtypeT::digit, __c))
462  {
463  _M_curToken = _S_token_backref;
464  _M_curValue.assign(1, __c);
465  }
466  else
467  __throw_regex_error(regex_constants::error_escape);
468  }
469 
470 
471  // Eats a character class or throwns an exception.
472  // current point to ':' delimiter on entry, char after ']' on return
473  template<typename _InputIterator>
474  void
475  _Scanner<_InputIterator>::
476  _M_eat_charclass()
477  {
478  ++_M_current; // skip ':'
479  if (_M_current == _M_end)
480  __throw_regex_error(regex_constants::error_ctype);
481  for (_M_curValue.clear();
482  _M_current != _M_end && *_M_current != _M_ctype.widen(':');
483  ++_M_current)
484  _M_curValue += *_M_current;
485  if (_M_current == _M_end)
486  __throw_regex_error(regex_constants::error_ctype);
487  ++_M_current; // skip ':'
488  if (*_M_current != _M_ctype.widen(']'))
489  __throw_regex_error(regex_constants::error_ctype);
490  ++_M_current; // skip ']'
491  }
492 
493 
494  template<typename _InputIterator>
495  void
496  _Scanner<_InputIterator>::
497  _M_eat_equivclass()
498  {
499  ++_M_current; // skip '='
500  if (_M_current == _M_end)
501  __throw_regex_error(regex_constants::error_collate);
502  for (_M_curValue.clear();
503  _M_current != _M_end && *_M_current != _M_ctype.widen('=');
504  ++_M_current)
505  _M_curValue += *_M_current;
506  if (_M_current == _M_end)
507  __throw_regex_error(regex_constants::error_collate);
508  ++_M_current; // skip '='
509  if (*_M_current != _M_ctype.widen(']'))
510  __throw_regex_error(regex_constants::error_collate);
511  ++_M_current; // skip ']'
512  }
513 
514 
515  template<typename _InputIterator>
516  void
517  _Scanner<_InputIterator>::
518  _M_eat_collsymbol()
519  {
520  ++_M_current; // skip '.'
521  if (_M_current == _M_end)
522  __throw_regex_error(regex_constants::error_collate);
523  for (_M_curValue.clear();
524  _M_current != _M_end && *_M_current != _M_ctype.widen('.');
525  ++_M_current)
526  _M_curValue += *_M_current;
527  if (_M_current == _M_end)
528  __throw_regex_error(regex_constants::error_collate);
529  ++_M_current; // skip '.'
530  if (*_M_current != _M_ctype.widen(']'))
531  __throw_regex_error(regex_constants::error_collate);
532  ++_M_current; // skip ']'
533  }
534 
535 #ifdef _GLIBCXX_DEBUG
536  template<typename _InputIterator>
537  std::ostream&
538  _Scanner<_InputIterator>::
539  _M_print(std::ostream& ostr)
540  {
541  switch (_M_curToken)
542  {
543  case _S_token_anychar:
544  ostr << "any-character\n";
545  break;
546  case _S_token_backref:
547  ostr << "backref\n";
548  break;
549  case _S_token_bracket_begin:
550  ostr << "bracket-begin\n";
551  break;
552  case _S_token_bracket_end:
553  ostr << "bracket-end\n";
554  break;
555  case _S_token_char_class_name:
556  ostr << "char-class-name \"" << _M_curValue << "\"\n";
557  break;
558  case _S_token_closure0:
559  ostr << "closure0\n";
560  break;
561  case _S_token_closure1:
562  ostr << "closure1\n";
563  break;
564  case _S_token_collelem_multi:
565  ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
566  break;
567  case _S_token_collelem_single:
568  ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
569  break;
570  case _S_token_collsymbol:
571  ostr << "collsymbol \"" << _M_curValue << "\"\n";
572  break;
573  case _S_token_comma:
574  ostr << "comma\n";
575  break;
576  case _S_token_dash:
577  ostr << "dash\n";
578  break;
579  case _S_token_dup_count:
580  ostr << "dup count: " << _M_curValue << "\n";
581  break;
582  case _S_token_eof:
583  ostr << "EOF\n";
584  break;
585  case _S_token_equiv_class_name:
586  ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
587  break;
588  case _S_token_interval_begin:
589  ostr << "interval begin\n";
590  break;
591  case _S_token_interval_end:
592  ostr << "interval end\n";
593  break;
594  case _S_token_line_begin:
595  ostr << "line begin\n";
596  break;
597  case _S_token_line_end:
598  ostr << "line end\n";
599  break;
600  case _S_token_opt:
601  ostr << "opt\n";
602  break;
603  case _S_token_or:
604  ostr << "or\n";
605  break;
606  case _S_token_ord_char:
607  ostr << "ordinary character: \"" << _M_value() << "\"\n";
608  break;
609  case _S_token_quoted_char:
610  ostr << "quoted char\n";
611  break;
612  case _S_token_subexpr_begin:
613  ostr << "subexpr begin\n";
614  break;
615  case _S_token_subexpr_end:
616  ostr << "subexpr end\n";
617  break;
618  case _S_token_word_begin:
619  ostr << "word begin\n";
620  break;
621  case _S_token_word_end:
622  ostr << "word end\n";
623  break;
624  case _S_token_unknown:
625  ostr << "-- unknown token --\n";
626  break;
627  }
628  return ostr;
629  }
630 #endif
631 
632  /// Builds an NFA from an input iterator interval.
633  template<typename _InIter, typename _TraitsT>
634  class _Compiler
635  {
636  public:
637  typedef _InIter _IterT;
638  typedef typename std::iterator_traits<_InIter>::value_type _CharT;
641 
642  _Compiler(const _InIter& __b, const _InIter& __e,
643  _TraitsT& __traits, _FlagT __flags);
644 
645  const _Nfa&
646  _M_nfa() const
647  { return _M_state_store; }
648 
649  private:
651  typedef typename _ScannerT::_TokenT _TokenT;
654 
655  // accepts a specific token or returns false.
656  bool
657  _M_match_token(_TokenT __token);
658 
659  void
660  _M_disjunction();
661 
662  bool
663  _M_alternative();
664 
665  bool
666  _M_term();
667 
668  bool
669  _M_assertion();
670 
671  bool
672  _M_quantifier();
673 
674  bool
675  _M_atom();
676 
677  bool
678  _M_bracket_expression();
679 
680  bool
681  _M_bracket_list(_RMatcherT& __matcher);
682 
683  bool
684  _M_follow_list(_RMatcherT& __matcher);
685 
686  bool
687  _M_follow_list2(_RMatcherT& __matcher);
688 
689  bool
690  _M_expression_term(_RMatcherT& __matcher);
691 
692  bool
693  _M_range_expression(_RMatcherT& __matcher);
694 
695  bool
696  _M_start_range(_RMatcherT& __matcher);
697 
698  bool
699  _M_collating_symbol(_RMatcherT& __matcher);
700 
701  bool
702  _M_equivalence_class(_RMatcherT& __matcher);
703 
704  bool
705  _M_character_class(_RMatcherT& __matcher);
706 
707  int
708  _M_cur_int_value(int __radix);
709 
710  _TraitsT& _M_traits;
711  _ScannerT _M_scanner;
712  _StringT _M_cur_value;
713  _Nfa _M_state_store;
714  _StackT _M_stack;
715  };
716 
717  template<typename _InIter, typename _TraitsT>
719  _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
720  _Compiler<_InIter, _TraitsT>::_FlagT __flags)
721  : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
722  _M_state_store(__flags)
723  {
724  typedef _StartTagger<_InIter, _TraitsT> _Start;
725  typedef _EndTagger<_InIter, _TraitsT> _End;
726 
727  _StateSeq __r(_M_state_store,
728  _M_state_store._M_insert_subexpr_begin(_Start(0)));
729  _M_disjunction();
730  if (!_M_stack.empty())
731  {
732  __r._M_append(_M_stack.top());
733  _M_stack.pop();
734  }
735  __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
736  __r._M_append(_M_state_store._M_insert_accept());
737  }
738 
739  template<typename _InIter, typename _TraitsT>
740  bool
741  _Compiler<_InIter, _TraitsT>::
742  _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
743  {
744  if (token == _M_scanner._M_token())
745  {
746  _M_cur_value = _M_scanner._M_value();
747  _M_scanner._M_advance();
748  return true;
749  }
750  return false;
751  }
752 
753  template<typename _InIter, typename _TraitsT>
754  void
755  _Compiler<_InIter, _TraitsT>::
756  _M_disjunction()
757  {
758  this->_M_alternative();
759  if (_M_match_token(_ScannerT::_S_token_or))
760  {
761  _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
762  this->_M_disjunction();
763  _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
764  _M_stack.push(_StateSeq(__alt1, __alt2));
765  }
766  }
767 
768  template<typename _InIter, typename _TraitsT>
769  bool
770  _Compiler<_InIter, _TraitsT>::
771  _M_alternative()
772  {
773  if (this->_M_term())
774  {
775  _StateSeq __re = _M_stack.top(); _M_stack.pop();
776  this->_M_alternative();
777  if (!_M_stack.empty())
778  {
779  __re._M_append(_M_stack.top());
780  _M_stack.pop();
781  }
782  _M_stack.push(__re);
783  return true;
784  }
785  return false;
786  }
787 
788  template<typename _InIter, typename _TraitsT>
789  bool
790  _Compiler<_InIter, _TraitsT>::
791  _M_term()
792  {
793  if (this->_M_assertion())
794  return true;
795  if (this->_M_atom())
796  {
797  this->_M_quantifier();
798  return true;
799  }
800  return false;
801  }
802 
803  template<typename _InIter, typename _TraitsT>
804  bool
805  _Compiler<_InIter, _TraitsT>::
806  _M_assertion()
807  {
808  if (_M_match_token(_ScannerT::_S_token_line_begin))
809  {
810  // __m.push(_Matcher::_S_opcode_line_begin);
811  return true;
812  }
813  if (_M_match_token(_ScannerT::_S_token_line_end))
814  {
815  // __m.push(_Matcher::_S_opcode_line_end);
816  return true;
817  }
818  if (_M_match_token(_ScannerT::_S_token_word_begin))
819  {
820  // __m.push(_Matcher::_S_opcode_word_begin);
821  return true;
822  }
823  if (_M_match_token(_ScannerT::_S_token_word_end))
824  {
825  // __m.push(_Matcher::_S_opcode_word_end);
826  return true;
827  }
828  return false;
829  }
830 
831  template<typename _InIter, typename _TraitsT>
832  bool
833  _Compiler<_InIter, _TraitsT>::
834  _M_quantifier()
835  {
836  if (_M_match_token(_ScannerT::_S_token_closure0))
837  {
838  if (_M_stack.empty())
839  __throw_regex_error(regex_constants::error_badrepeat);
840  _StateSeq __r(_M_stack.top(), -1);
841  __r._M_append(__r._M_front());
842  _M_stack.pop();
843  _M_stack.push(__r);
844  return true;
845  }
846  if (_M_match_token(_ScannerT::_S_token_closure1))
847  {
848  if (_M_stack.empty())
849  __throw_regex_error(regex_constants::error_badrepeat);
850  _StateSeq __r(_M_state_store,
851  _M_state_store.
852  _M_insert_alt(_S_invalid_state_id,
853  _M_stack.top()._M_front()));
854  _M_stack.top()._M_append(__r);
855  return true;
856  }
857  if (_M_match_token(_ScannerT::_S_token_opt))
858  {
859  if (_M_stack.empty())
860  __throw_regex_error(regex_constants::error_badrepeat);
861  _StateSeq __r(_M_stack.top(), -1);
862  _M_stack.pop();
863  _M_stack.push(__r);
864  return true;
865  }
866  if (_M_match_token(_ScannerT::_S_token_interval_begin))
867  {
868  if (_M_stack.empty())
869  __throw_regex_error(regex_constants::error_badrepeat);
870  if (!_M_match_token(_ScannerT::_S_token_dup_count))
871  __throw_regex_error(regex_constants::error_badbrace);
872  _StateSeq __r(_M_stack.top());
873  int __min_rep = _M_cur_int_value(10);
874  for (int __i = 1; __i < __min_rep; ++__i)
875  _M_stack.top()._M_append(__r._M_clone());
876  if (_M_match_token(_ScannerT::_S_token_comma))
877  if (_M_match_token(_ScannerT::_S_token_dup_count))
878  {
879  int __n = _M_cur_int_value(10) - __min_rep;
880  if (__n < 0)
881  __throw_regex_error(regex_constants::error_badbrace);
882  for (int __i = 0; __i < __n; ++__i)
883  {
884  _StateSeq __r(_M_state_store,
885  _M_state_store.
886  _M_insert_alt(_S_invalid_state_id,
887  _M_stack.top()._M_front()));
888  _M_stack.top()._M_append(__r);
889  }
890  }
891  else
892  {
893  _StateSeq __r(_M_stack.top(), -1);
894  __r._M_push_back(__r._M_front());
895  _M_stack.pop();
896  _M_stack.push(__r);
897  }
898  if (!_M_match_token(_ScannerT::_S_token_interval_end))
899  __throw_regex_error(regex_constants::error_brace);
900  return true;
901  }
902  return false;
903  }
904 
905  template<typename _InIter, typename _TraitsT>
906  bool
907  _Compiler<_InIter, _TraitsT>::
908  _M_atom()
909  {
910  typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
911  typedef _StartTagger<_InIter, _TraitsT> _Start;
912  typedef _EndTagger<_InIter, _TraitsT> _End;
913 
914  if (_M_match_token(_ScannerT::_S_token_anychar))
915  {
916  _M_stack.push(_StateSeq(_M_state_store,
917  _M_state_store._M_insert_matcher
918  (_AnyMatcher)));
919  return true;
920  }
921  if (_M_match_token(_ScannerT::_S_token_ord_char))
922  {
923  _M_stack.push(_StateSeq(_M_state_store,
924  _M_state_store._M_insert_matcher
925  (_CMatcher(_M_cur_value[0], _M_traits))));
926  return true;
927  }
928  if (_M_match_token(_ScannerT::_S_token_quoted_char))
929  {
930  // note that in the ECMA grammar, this case covers backrefs.
931  _M_stack.push(_StateSeq(_M_state_store,
932  _M_state_store._M_insert_matcher
933  (_CMatcher(_M_cur_value[0], _M_traits))));
934  return true;
935  }
936  if (_M_match_token(_ScannerT::_S_token_backref))
937  {
938  // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
939  return true;
940  }
941  if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
942  {
943  int __mark = _M_state_store._M_sub_count();
944  _StateSeq __r(_M_state_store,
945  _M_state_store.
946  _M_insert_subexpr_begin(_Start(__mark)));
947  this->_M_disjunction();
948  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
949  __throw_regex_error(regex_constants::error_paren);
950  if (!_M_stack.empty())
951  {
952  __r._M_append(_M_stack.top());
953  _M_stack.pop();
954  }
955  __r._M_append(_M_state_store._M_insert_subexpr_end
956  (__mark, _End(__mark)));
957  _M_stack.push(__r);
958  return true;
959  }
960  return _M_bracket_expression();
961  }
962 
963  template<typename _InIter, typename _TraitsT>
964  bool
965  _Compiler<_InIter, _TraitsT>::
966  _M_bracket_expression()
967  {
968  if (_M_match_token(_ScannerT::_S_token_bracket_begin))
969  {
970  _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
971  _M_traits);
972  if (!_M_bracket_list(__matcher)
973  || !_M_match_token(_ScannerT::_S_token_bracket_end))
974  __throw_regex_error(regex_constants::error_brack);
975  _M_stack.push(_StateSeq(_M_state_store,
976  _M_state_store._M_insert_matcher(__matcher)));
977  return true;
978  }
979  return false;
980  }
981 
982  // If the dash is the last character in the bracket expression, it is not
983  // special.
984  template<typename _InIter, typename _TraitsT>
985  bool
986  _Compiler<_InIter, _TraitsT>::
987  _M_bracket_list(_RMatcherT& __matcher)
988  {
989  if (_M_follow_list(__matcher))
990  {
991  if (_M_match_token(_ScannerT::_S_token_dash))
992  __matcher._M_add_char(_M_cur_value[0]);
993  return true;
994  }
995  return false;
996  }
997 
998  template<typename _InIter, typename _TraitsT>
999  bool
1000  _Compiler<_InIter, _TraitsT>::
1001  _M_follow_list(_RMatcherT& __matcher)
1002  { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
1003 
1004  template<typename _InIter, typename _TraitsT>
1005  bool
1006  _Compiler<_InIter, _TraitsT>::
1007  _M_follow_list2(_RMatcherT& __matcher)
1008  {
1009  if (_M_expression_term(__matcher))
1010  return _M_follow_list2(__matcher);
1011  return true;
1012  }
1013 
1014  template<typename _InIter, typename _TraitsT>
1015  bool
1016  _Compiler<_InIter, _TraitsT>::
1017  _M_expression_term(_RMatcherT& __matcher)
1018  {
1019  return (_M_collating_symbol(__matcher)
1020  || _M_character_class(__matcher)
1021  || _M_equivalence_class(__matcher)
1022  || (_M_start_range(__matcher)
1023  && _M_range_expression(__matcher)));
1024  }
1025 
1026  template<typename _InIter, typename _TraitsT>
1027  bool
1028  _Compiler<_InIter, _TraitsT>::
1029  _M_range_expression(_RMatcherT& __matcher)
1030  {
1031  if (!_M_collating_symbol(__matcher))
1032  if (!_M_match_token(_ScannerT::_S_token_dash))
1033  __throw_regex_error(regex_constants::error_range);
1034  __matcher._M_make_range();
1035  return true;
1036  }
1037 
1038  template<typename _InIter, typename _TraitsT>
1039  bool
1040  _Compiler<_InIter, _TraitsT>::
1041  _M_start_range(_RMatcherT& __matcher)
1042  { return _M_match_token(_ScannerT::_S_token_dash); }
1043 
1044  template<typename _InIter, typename _TraitsT>
1045  bool
1046  _Compiler<_InIter, _TraitsT>::
1047  _M_collating_symbol(_RMatcherT& __matcher)
1048  {
1049  if (_M_match_token(_ScannerT::_S_token_collelem_single))
1050  {
1051  __matcher._M_add_char(_M_cur_value[0]);
1052  return true;
1053  }
1054  if (_M_match_token(_ScannerT::_S_token_collsymbol))
1055  {
1056  __matcher._M_add_collating_element(_M_cur_value);
1057  return true;
1058  }
1059  return false;
1060  }
1061 
1062  template<typename _InIter, typename _TraitsT>
1063  bool
1064  _Compiler<_InIter, _TraitsT>::
1065  _M_equivalence_class(_RMatcherT& __matcher)
1066  {
1067  if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1068  {
1069  __matcher._M_add_equivalence_class(_M_cur_value);
1070  return true;
1071  }
1072  return false;
1073  }
1074 
1075  template<typename _InIter, typename _TraitsT>
1076  bool
1077  _Compiler<_InIter, _TraitsT>::
1078  _M_character_class(_RMatcherT& __matcher)
1079  {
1080  if (_M_match_token(_ScannerT::_S_token_char_class_name))
1081  {
1082  __matcher._M_add_character_class(_M_cur_value);
1083  return true;
1084  }
1085  return false;
1086  }
1087 
1088  template<typename _InIter, typename _TraitsT>
1089  int
1090  _Compiler<_InIter, _TraitsT>::
1091  _M_cur_int_value(int __radix)
1092  {
1093  int __v = 0;
1094  for (typename _StringT::size_type __i = 0;
1095  __i < _M_cur_value.length(); ++__i)
1096  __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1097  return __v;
1098  }
1099 
1100  template<typename _InIter, typename _TraitsT>
1102  __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1104  { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1105  __f)._M_nfa())); }
1106 
1107  //@} regex-detail
1108 _GLIBCXX_END_NAMESPACE_VERSION
1109 } // namespace __detail
1110 } // namespace std
Container class for localization functionality.The locale class is first a class wrapper for C librar...
constexpr error_type error_badbrace(_S_error_badbrace)
Start state tag.
Definition: regex_nfa.h:88
struct _Nfa
Definition: regex_nfa.h:269
constexpr error_type error_badrepeat(_S_error_badrepeat)
void pop()
Removes first element.
Definition: stl_stack.h:212
constexpr syntax_option_type grep
Builds an NFA from an input iterator interval.
constexpr error_type error_paren(_S_error_paren)
bool empty() const
Definition: stl_stack.h:146
constexpr syntax_option_type ECMAScript
constexpr error_type error_ctype(_S_error_ctype)
static const _StateIdT _S_invalid_state_id
The special case in which a state identifier is not an index.
Definition: regex_nfa.h:198
ISO C++ entities toplevel namespace is std.
Describes a sequence of one or more _State, its current start and end(s). This structure contains fra...
Definition: regex_nfa.h:352
struct _Scanner. Scans an input range for regex tokens.
constexpr error_type error_range(_S_error_range)
std::shared_ptr< _Automaton > _AutomatonPtr
Generic shared pointer to an automaton.
Definition: regex_nfa.h:62
Matches a character range (bracket expression)
Definition: regex_nfa.h:152
_TokenT
Token types returned from the scanner.
bool _AnyMatcher(const _PatternCursor &)
Matches any character.
Definition: regex_nfa.h:124
reference top()
Definition: stl_stack.h:159
constexpr error_type error_brace(_S_error_brace)
unsigned int syntax_option_type
This is a bitmask type indicating how to interpret the regex.
End state tag.
Definition: regex_nfa.h:104
Primary class template ctype facet.This template class defines classification and conversion function...
A standard container giving FILO behavior.
Definition: stl_stack.h:96
constexpr syntax_option_type basic
constexpr error_type error_collate(_S_error_collate)
constexpr error_type error_brack(_S_error_brack)
Base class for scanner.
constexpr error_type error_escape(_S_error_escape)