libstdc++
regex_compiler.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2010, 2011 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_compiler.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __regex
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37  struct _Scanner_base
38  {
39  typedef unsigned int _StateT;
40 
41  static constexpr _StateT _S_state_at_start = 1 << 0;
42  static constexpr _StateT _S_state_in_brace = 1 << 2;
43  static constexpr _StateT _S_state_in_bracket = 1 << 3;
44 
45  virtual ~_Scanner_base() { };
46  };
47 
48  //
49  // @brief Scans an input range for regex tokens.
50  //
51  // The %_Scanner class interprets the regular expression pattern in the input
52  // range passed to its constructor as a sequence of parse tokens passed to
53  // the regular expression compiler. The sequence of tokens provided depends
54  // on the flag settings passed to the constructor: different regular
55  // expression grammars will interpret the same input pattern in
56  // syntactically different ways.
57  //
58  template<typename _InputIterator>
59  class _Scanner: public _Scanner_base
60  {
61  public:
62  typedef _InputIterator _IteratorT;
63  typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
64  typedef std::basic_string<_CharT> _StringT;
66  typedef const std::ctype<_CharT> _CtypeT;
67 
68  // Token types returned from the scanner.
69  enum _TokenT
70  {
71  _S_token_anychar,
72  _S_token_backref,
73  _S_token_bracket_begin,
74  _S_token_bracket_end,
75  _S_token_inverse_class,
76  _S_token_char_class_name,
77  _S_token_closure0,
78  _S_token_closure1,
79  _S_token_collelem_multi,
80  _S_token_collelem_single,
81  _S_token_collsymbol,
82  _S_token_comma,
83  _S_token_dash,
84  _S_token_dup_count,
85  _S_token_eof,
86  _S_token_equiv_class_name,
87  _S_token_interval_begin,
88  _S_token_interval_end,
89  _S_token_line_begin,
90  _S_token_line_end,
91  _S_token_opt,
92  _S_token_or,
93  _S_token_ord_char,
94  _S_token_quoted_char,
95  _S_token_subexpr_begin,
96  _S_token_subexpr_end,
97  _S_token_word_begin,
98  _S_token_word_end,
99  _S_token_unknown
100  };
101 
102  public:
103  _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
104  std::locale __loc)
105  : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
106  _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
107  { _M_advance(); }
108 
109  void
110  _M_advance();
111 
112  _TokenT
113  _M_token() const
114  { return _M_curToken; }
115 
116  const _StringT&
117  _M_value() const
118  { return _M_curValue; }
119 
120 #ifdef _GLIBCXX_DEBUG
121  std::ostream&
122  _M_print(std::ostream&);
123 #endif
124 
125  private:
126  void
127  _M_eat_escape();
128 
129  void
130  _M_scan_in_brace();
131 
132  void
133  _M_scan_in_bracket();
134 
135  void
136  _M_eat_charclass();
137 
138  void
139  _M_eat_equivclass();
140 
141  void
142  _M_eat_collsymbol();
143 
144  private:
145  _IteratorT _M_current;
146  _IteratorT _M_end;
147  _FlagT _M_flags;
148  _CtypeT& _M_ctype;
149  _TokenT _M_curToken;
150  _StringT _M_curValue;
151  _StateT _M_state;
152  };
153 
154  template<typename _InputIterator>
155  void
156  _Scanner<_InputIterator>::
157  _M_advance()
158  {
159  if (_M_current == _M_end)
160  {
161  _M_curToken = _S_token_eof;
162  return;
163  }
164 
165  _CharT __c = *_M_current;
166  if (_M_state & _S_state_in_bracket)
167  {
168  _M_scan_in_bracket();
169  return;
170  }
171  if (_M_state & _S_state_in_brace)
172  {
173  _M_scan_in_brace();
174  return;
175  }
176 #if 0
177  // TODO: re-enable line anchors when _M_assertion is implemented.
178  // See PR libstdc++/47724
179  else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
180  {
181  _M_curToken = _S_token_line_begin;
182  ++_M_current;
183  return;
184  }
185  else if (__c == _M_ctype.widen('$'))
186  {
187  _M_curToken = _S_token_line_end;
188  ++_M_current;
189  return;
190  }
191 #endif
192  else if (__c == _M_ctype.widen('.'))
193  {
194  _M_curToken = _S_token_anychar;
195  ++_M_current;
196  return;
197  }
198  else if (__c == _M_ctype.widen('*'))
199  {
200  _M_curToken = _S_token_closure0;
201  ++_M_current;
202  return;
203  }
204  else if (__c == _M_ctype.widen('+'))
205  {
206  _M_curToken = _S_token_closure1;
207  ++_M_current;
208  return;
209  }
210  else if (__c == _M_ctype.widen('|'))
211  {
212  _M_curToken = _S_token_or;
213  ++_M_current;
214  return;
215  }
216  else if (__c == _M_ctype.widen('['))
217  {
218  _M_curToken = _S_token_bracket_begin;
219  _M_state |= (_S_state_in_bracket | _S_state_at_start);
220  ++_M_current;
221  return;
222  }
223  else if (__c == _M_ctype.widen('\\'))
224  {
225  _M_eat_escape();
226  return;
227  }
228  else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
229  {
230  if (__c == _M_ctype.widen('('))
231  {
232  _M_curToken = _S_token_subexpr_begin;
233  ++_M_current;
234  return;
235  }
236  else if (__c == _M_ctype.widen(')'))
237  {
238  _M_curToken = _S_token_subexpr_end;
239  ++_M_current;
240  return;
241  }
242  else if (__c == _M_ctype.widen('{'))
243  {
244  _M_curToken = _S_token_interval_begin;
245  _M_state |= _S_state_in_brace;
246  ++_M_current;
247  return;
248  }
249  }
250 
251  _M_curToken = _S_token_ord_char;
252  _M_curValue.assign(1, __c);
253  ++_M_current;
254  }
255 
256 
257  template<typename _InputIterator>
258  void
259  _Scanner<_InputIterator>::
260  _M_scan_in_brace()
261  {
262  if (_M_ctype.is(_CtypeT::digit, *_M_current))
263  {
264  _M_curToken = _S_token_dup_count;
265  _M_curValue.assign(1, *_M_current);
266  ++_M_current;
267  while (_M_current != _M_end
268  && _M_ctype.is(_CtypeT::digit, *_M_current))
269  {
270  _M_curValue += *_M_current;
271  ++_M_current;
272  }
273  return;
274  }
275  else if (*_M_current == _M_ctype.widen(','))
276  {
277  _M_curToken = _S_token_comma;
278  ++_M_current;
279  return;
280  }
282  {
283  if (*_M_current == _M_ctype.widen('\\'))
284  _M_eat_escape();
285  }
286  else
287  {
288  if (*_M_current == _M_ctype.widen('}'))
289  {
290  _M_curToken = _S_token_interval_end;
291  _M_state &= ~_S_state_in_brace;
292  ++_M_current;
293  return;
294  }
295  }
296  }
297 
298  template<typename _InputIterator>
299  void
300  _Scanner<_InputIterator>::
301  _M_scan_in_bracket()
302  {
303  if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
304  {
305  _M_curToken = _S_token_inverse_class;
306  _M_state &= ~_S_state_at_start;
307  ++_M_current;
308  return;
309  }
310  else if (*_M_current == _M_ctype.widen('['))
311  {
312  ++_M_current;
313  if (_M_current == _M_end)
314  {
315  _M_curToken = _S_token_eof;
316  return;
317  }
318 
319  if (*_M_current == _M_ctype.widen('.'))
320  {
321  _M_curToken = _S_token_collsymbol;
322  _M_eat_collsymbol();
323  return;
324  }
325  else if (*_M_current == _M_ctype.widen(':'))
326  {
327  _M_curToken = _S_token_char_class_name;
328  _M_eat_charclass();
329  return;
330  }
331  else if (*_M_current == _M_ctype.widen('='))
332  {
333  _M_curToken = _S_token_equiv_class_name;
334  _M_eat_equivclass();
335  return;
336  }
337  }
338  else if (*_M_current == _M_ctype.widen('-'))
339  {
340  _M_curToken = _S_token_dash;
341  ++_M_current;
342  return;
343  }
344  else if (*_M_current == _M_ctype.widen(']'))
345  {
346  if (!(_M_flags & regex_constants::ECMAScript)
347  || !(_M_state & _S_state_at_start))
348  {
349  // special case: only if _not_ chr first after
350  // '[' or '[^' and if not ECMAscript
351  _M_curToken = _S_token_bracket_end;
352  ++_M_current;
353  return;
354  }
355  }
356  _M_curToken = _S_token_collelem_single;
357  _M_curValue.assign(1, *_M_current);
358  ++_M_current;
359  }
360 
361  template<typename _InputIterator>
362  void
363  _Scanner<_InputIterator>::
364  _M_eat_escape()
365  {
366  ++_M_current;
367  if (_M_current == _M_end)
368  {
369  _M_curToken = _S_token_eof;
370  return;
371  }
372  _CharT __c = *_M_current;
373  ++_M_current;
374 
375  if (__c == _M_ctype.widen('('))
376  {
377  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
378  {
379  _M_curToken = _S_token_ord_char;
380  _M_curValue.assign(1, __c);
381  }
382  else
383  _M_curToken = _S_token_subexpr_begin;
384  }
385  else if (__c == _M_ctype.widen(')'))
386  {
387  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
388  {
389  _M_curToken = _S_token_ord_char;
390  _M_curValue.assign(1, __c);
391  }
392  else
393  _M_curToken = _S_token_subexpr_end;
394  }
395  else if (__c == _M_ctype.widen('{'))
396  {
397  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
398  {
399  _M_curToken = _S_token_ord_char;
400  _M_curValue.assign(1, __c);
401  }
402  else
403  {
404  _M_curToken = _S_token_interval_begin;
405  _M_state |= _S_state_in_brace;
406  }
407  }
408  else if (__c == _M_ctype.widen('}'))
409  {
410  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
411  {
412  _M_curToken = _S_token_ord_char;
413  _M_curValue.assign(1, __c);
414  }
415  else
416  {
417  if (!(_M_state && _S_state_in_brace))
418  __throw_regex_error(regex_constants::error_badbrace);
419  _M_state &= ~_S_state_in_brace;
420  _M_curToken = _S_token_interval_end;
421  }
422  }
423  else if (__c == _M_ctype.widen('x'))
424  {
425  ++_M_current;
426  if (_M_current == _M_end)
427  {
428  _M_curToken = _S_token_eof;
429  return;
430  }
431  if (_M_ctype.is(_CtypeT::digit, *_M_current))
432  {
433  _M_curValue.assign(1, *_M_current);
434  ++_M_current;
435  if (_M_current == _M_end)
436  {
437  _M_curToken = _S_token_eof;
438  return;
439  }
440  if (_M_ctype.is(_CtypeT::digit, *_M_current))
441  {
442  _M_curValue += *_M_current;
443  ++_M_current;
444  return;
445  }
446  }
447  }
448  else if (__c == _M_ctype.widen('^')
449  || __c == _M_ctype.widen('.')
450  || __c == _M_ctype.widen('*')
451  || __c == _M_ctype.widen('$')
452  || __c == _M_ctype.widen('\\'))
453  {
454  _M_curToken = _S_token_ord_char;
455  _M_curValue.assign(1, __c);
456  }
457  else if (_M_ctype.is(_CtypeT::digit, __c))
458  {
459  _M_curToken = _S_token_backref;
460  _M_curValue.assign(1, __c);
461  }
462  else
463  __throw_regex_error(regex_constants::error_escape);
464  }
465 
466 
467  // Eats a character class or throwns an exception.
468  // current point to ':' delimiter on entry, char after ']' on return
469  template<typename _InputIterator>
470  void
471  _Scanner<_InputIterator>::
472  _M_eat_charclass()
473  {
474  ++_M_current; // skip ':'
475  if (_M_current == _M_end)
476  __throw_regex_error(regex_constants::error_ctype);
477  for (_M_curValue.clear();
478  _M_current != _M_end && *_M_current != _M_ctype.widen(':');
479  ++_M_current)
480  _M_curValue += *_M_current;
481  if (_M_current == _M_end)
482  __throw_regex_error(regex_constants::error_ctype);
483  ++_M_current; // skip ':'
484  if (*_M_current != _M_ctype.widen(']'))
485  __throw_regex_error(regex_constants::error_ctype);
486  ++_M_current; // skip ']'
487  }
488 
489 
490  template<typename _InputIterator>
491  void
492  _Scanner<_InputIterator>::
493  _M_eat_equivclass()
494  {
495  ++_M_current; // skip '='
496  if (_M_current == _M_end)
497  __throw_regex_error(regex_constants::error_collate);
498  for (_M_curValue.clear();
499  _M_current != _M_end && *_M_current != _M_ctype.widen('=');
500  ++_M_current)
501  _M_curValue += *_M_current;
502  if (_M_current == _M_end)
503  __throw_regex_error(regex_constants::error_collate);
504  ++_M_current; // skip '='
505  if (*_M_current != _M_ctype.widen(']'))
506  __throw_regex_error(regex_constants::error_collate);
507  ++_M_current; // skip ']'
508  }
509 
510 
511  template<typename _InputIterator>
512  void
513  _Scanner<_InputIterator>::
514  _M_eat_collsymbol()
515  {
516  ++_M_current; // skip '.'
517  if (_M_current == _M_end)
518  __throw_regex_error(regex_constants::error_collate);
519  for (_M_curValue.clear();
520  _M_current != _M_end && *_M_current != _M_ctype.widen('.');
521  ++_M_current)
522  _M_curValue += *_M_current;
523  if (_M_current == _M_end)
524  __throw_regex_error(regex_constants::error_collate);
525  ++_M_current; // skip '.'
526  if (*_M_current != _M_ctype.widen(']'))
527  __throw_regex_error(regex_constants::error_collate);
528  ++_M_current; // skip ']'
529  }
530 
531 #ifdef _GLIBCXX_DEBUG
532  template<typename _InputIterator>
533  std::ostream&
534  _Scanner<_InputIterator>::
535  _M_print(std::ostream& ostr)
536  {
537  switch (_M_curToken)
538  {
539  case _S_token_anychar:
540  ostr << "any-character\n";
541  break;
542  case _S_token_backref:
543  ostr << "backref\n";
544  break;
545  case _S_token_bracket_begin:
546  ostr << "bracket-begin\n";
547  break;
548  case _S_token_bracket_end:
549  ostr << "bracket-end\n";
550  break;
551  case _S_token_char_class_name:
552  ostr << "char-class-name \"" << _M_curValue << "\"\n";
553  break;
554  case _S_token_closure0:
555  ostr << "closure0\n";
556  break;
557  case _S_token_closure1:
558  ostr << "closure1\n";
559  break;
560  case _S_token_collelem_multi:
561  ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
562  break;
563  case _S_token_collelem_single:
564  ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
565  break;
566  case _S_token_collsymbol:
567  ostr << "collsymbol \"" << _M_curValue << "\"\n";
568  break;
569  case _S_token_comma:
570  ostr << "comma\n";
571  break;
572  case _S_token_dash:
573  ostr << "dash\n";
574  break;
575  case _S_token_dup_count:
576  ostr << "dup count: " << _M_curValue << "\n";
577  break;
578  case _S_token_eof:
579  ostr << "EOF\n";
580  break;
581  case _S_token_equiv_class_name:
582  ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
583  break;
584  case _S_token_interval_begin:
585  ostr << "interval begin\n";
586  break;
587  case _S_token_interval_end:
588  ostr << "interval end\n";
589  break;
590  case _S_token_line_begin:
591  ostr << "line begin\n";
592  break;
593  case _S_token_line_end:
594  ostr << "line end\n";
595  break;
596  case _S_token_opt:
597  ostr << "opt\n";
598  break;
599  case _S_token_or:
600  ostr << "or\n";
601  break;
602  case _S_token_ord_char:
603  ostr << "ordinary character: \"" << _M_value() << "\"\n";
604  break;
605  case _S_token_quoted_char:
606  ostr << "quoted char\n";
607  break;
608  case _S_token_subexpr_begin:
609  ostr << "subexpr begin\n";
610  break;
611  case _S_token_subexpr_end:
612  ostr << "subexpr end\n";
613  break;
614  case _S_token_word_begin:
615  ostr << "word begin\n";
616  break;
617  case _S_token_word_end:
618  ostr << "word end\n";
619  break;
620  case _S_token_unknown:
621  ostr << "-- unknown token --\n";
622  break;
623  }
624  return ostr;
625  }
626 #endif
627 
628  // Builds an NFA from an input iterator interval.
629  template<typename _InIter, typename _TraitsT>
630  class _Compiler
631  {
632  public:
633  typedef _InIter _IterT;
634  typedef typename std::iterator_traits<_InIter>::value_type _CharT;
635  typedef std::basic_string<_CharT> _StringT;
637 
638  public:
639  _Compiler(const _InIter& __b, const _InIter& __e,
640  _TraitsT& __traits, _FlagT __flags);
641 
642  const _Nfa&
643  _M_nfa() const
644  { return _M_state_store; }
645 
646  private:
647  typedef _Scanner<_InIter> _ScannerT;
648  typedef typename _ScannerT::_TokenT _TokenT;
650  typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT;
651 
652  // accepts a specific token or returns false.
653  bool
654  _M_match_token(_TokenT __token);
655 
656  void
657  _M_disjunction();
658 
659  bool
660  _M_alternative();
661 
662  bool
663  _M_term();
664 
665  bool
666  _M_assertion();
667 
668  bool
669  _M_quantifier();
670 
671  bool
672  _M_atom();
673 
674  bool
675  _M_bracket_expression();
676 
677  bool
678  _M_bracket_list(_RMatcherT& __matcher);
679 
680  bool
681  _M_follow_list(_RMatcherT& __matcher);
682 
683  bool
684  _M_follow_list2(_RMatcherT& __matcher);
685 
686  bool
687  _M_expression_term(_RMatcherT& __matcher);
688 
689  bool
690  _M_range_expression(_RMatcherT& __matcher);
691 
692  bool
693  _M_start_range(_RMatcherT& __matcher);
694 
695  bool
696  _M_collating_symbol(_RMatcherT& __matcher);
697 
698  bool
699  _M_equivalence_class(_RMatcherT& __matcher);
700 
701  bool
702  _M_character_class(_RMatcherT& __matcher);
703 
704  int
705  _M_cur_int_value(int __radix);
706 
707  private:
708  _TraitsT& _M_traits;
709  _ScannerT _M_scanner;
710  _StringT _M_cur_value;
711  _Nfa _M_state_store;
712  _StackT _M_stack;
713  };
714 
715  template<typename _InIter, typename _TraitsT>
716  _Compiler<_InIter, _TraitsT>::
717  _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
718  _Compiler<_InIter, _TraitsT>::_FlagT __flags)
719  : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
720  _M_state_store(__flags)
721  {
722  typedef _StartTagger<_InIter, _TraitsT> _Start;
723  typedef _EndTagger<_InIter, _TraitsT> _End;
724 
725  _StateSeq __r(_M_state_store,
726  _M_state_store._M_insert_subexpr_begin(_Start(0)));
727  _M_disjunction();
728  if (!_M_stack.empty())
729  {
730  __r._M_append(_M_stack.top());
731  _M_stack.pop();
732  }
733  __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
734  __r._M_append(_M_state_store._M_insert_accept());
735  }
736 
737  template<typename _InIter, typename _TraitsT>
738  bool
739  _Compiler<_InIter, _TraitsT>::
740  _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
741  {
742  if (token == _M_scanner._M_token())
743  {
744  _M_cur_value = _M_scanner._M_value();
745  _M_scanner._M_advance();
746  return true;
747  }
748  return false;
749  }
750 
751  template<typename _InIter, typename _TraitsT>
752  void
753  _Compiler<_InIter, _TraitsT>::
754  _M_disjunction()
755  {
756  this->_M_alternative();
757  if (_M_match_token(_ScannerT::_S_token_or))
758  {
759  _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
760  this->_M_disjunction();
761  _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
762  _M_stack.push(_StateSeq(__alt1, __alt2));
763  }
764  }
765 
766  template<typename _InIter, typename _TraitsT>
767  bool
768  _Compiler<_InIter, _TraitsT>::
769  _M_alternative()
770  {
771  if (this->_M_term())
772  {
773  _StateSeq __re = _M_stack.top(); _M_stack.pop();
774  this->_M_alternative();
775  if (!_M_stack.empty())
776  {
777  __re._M_append(_M_stack.top());
778  _M_stack.pop();
779  }
780  _M_stack.push(__re);
781  return true;
782  }
783  return false;
784  }
785 
786  template<typename _InIter, typename _TraitsT>
787  bool
788  _Compiler<_InIter, _TraitsT>::
789  _M_term()
790  {
791  if (this->_M_assertion())
792  return true;
793  if (this->_M_atom())
794  {
795  this->_M_quantifier();
796  return true;
797  }
798  return false;
799  }
800 
801  template<typename _InIter, typename _TraitsT>
802  bool
803  _Compiler<_InIter, _TraitsT>::
804  _M_assertion()
805  {
806  if (_M_match_token(_ScannerT::_S_token_line_begin))
807  {
808  // __m.push(_Matcher::_S_opcode_line_begin);
809  return true;
810  }
811  if (_M_match_token(_ScannerT::_S_token_line_end))
812  {
813  // __m.push(_Matcher::_S_opcode_line_end);
814  return true;
815  }
816  if (_M_match_token(_ScannerT::_S_token_word_begin))
817  {
818  // __m.push(_Matcher::_S_opcode_word_begin);
819  return true;
820  }
821  if (_M_match_token(_ScannerT::_S_token_word_end))
822  {
823  // __m.push(_Matcher::_S_opcode_word_end);
824  return true;
825  }
826  return false;
827  }
828 
829  template<typename _InIter, typename _TraitsT>
830  bool
831  _Compiler<_InIter, _TraitsT>::
832  _M_quantifier()
833  {
834  if (_M_match_token(_ScannerT::_S_token_closure0))
835  {
836  if (_M_stack.empty())
837  __throw_regex_error(regex_constants::error_badrepeat);
838  _StateSeq __r(_M_stack.top(), -1);
839  __r._M_append(__r._M_front());
840  _M_stack.pop();
841  _M_stack.push(__r);
842  return true;
843  }
844  if (_M_match_token(_ScannerT::_S_token_closure1))
845  {
846  if (_M_stack.empty())
847  __throw_regex_error(regex_constants::error_badrepeat);
848  _StateSeq __r(_M_state_store,
849  _M_state_store.
850  _M_insert_alt(_S_invalid_state_id,
851  _M_stack.top()._M_front()));
852  _M_stack.top()._M_append(__r);
853  return true;
854  }
855  if (_M_match_token(_ScannerT::_S_token_opt))
856  {
857  if (_M_stack.empty())
858  __throw_regex_error(regex_constants::error_badrepeat);
859  _StateSeq __r(_M_stack.top(), -1);
860  _M_stack.pop();
861  _M_stack.push(__r);
862  return true;
863  }
864  if (_M_match_token(_ScannerT::_S_token_interval_begin))
865  {
866  if (_M_stack.empty())
867  __throw_regex_error(regex_constants::error_badrepeat);
868  if (!_M_match_token(_ScannerT::_S_token_dup_count))
869  __throw_regex_error(regex_constants::error_badbrace);
870  _StateSeq __r(_M_stack.top());
871  int __min_rep = _M_cur_int_value(10);
872  for (int __i = 1; __i < __min_rep; ++__i)
873  _M_stack.top()._M_append(__r._M_clone());
874  if (_M_match_token(_ScannerT::_S_token_comma))
875  if (_M_match_token(_ScannerT::_S_token_dup_count))
876  {
877  int __n = _M_cur_int_value(10) - __min_rep;
878  if (__n < 0)
879  __throw_regex_error(regex_constants::error_badbrace);
880  for (int __i = 0; __i < __n; ++__i)
881  {
882  _StateSeq __r(_M_state_store,
883  _M_state_store.
884  _M_insert_alt(_S_invalid_state_id,
885  _M_stack.top()._M_front()));
886  _M_stack.top()._M_append(__r);
887  }
888  }
889  else
890  {
891  _StateSeq __r(_M_stack.top(), -1);
892  __r._M_push_back(__r._M_front());
893  _M_stack.pop();
894  _M_stack.push(__r);
895  }
896  if (!_M_match_token(_ScannerT::_S_token_interval_end))
897  __throw_regex_error(regex_constants::error_brace);
898  return true;
899  }
900  return false;
901  }
902 
903  template<typename _InIter, typename _TraitsT>
904  bool
905  _Compiler<_InIter, _TraitsT>::
906  _M_atom()
907  {
908  typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
909  typedef _StartTagger<_InIter, _TraitsT> _Start;
910  typedef _EndTagger<_InIter, _TraitsT> _End;
911 
912  if (_M_match_token(_ScannerT::_S_token_anychar))
913  {
914  _M_stack.push(_StateSeq(_M_state_store,
915  _M_state_store._M_insert_matcher
916  (_AnyMatcher)));
917  return true;
918  }
919  if (_M_match_token(_ScannerT::_S_token_ord_char))
920  {
921  _M_stack.push(_StateSeq(_M_state_store,
922  _M_state_store._M_insert_matcher
923  (_CMatcher(_M_cur_value[0], _M_traits))));
924  return true;
925  }
926  if (_M_match_token(_ScannerT::_S_token_quoted_char))
927  {
928  // note that in the ECMA grammar, this case covers backrefs.
929  _M_stack.push(_StateSeq(_M_state_store,
930  _M_state_store._M_insert_matcher
931  (_CMatcher(_M_cur_value[0], _M_traits))));
932  return true;
933  }
934  if (_M_match_token(_ScannerT::_S_token_backref))
935  {
936  // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
937  return true;
938  }
939  if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
940  {
941  int __mark = _M_state_store._M_sub_count();
942  _StateSeq __r(_M_state_store,
943  _M_state_store.
944  _M_insert_subexpr_begin(_Start(__mark)));
945  this->_M_disjunction();
946  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
947  __throw_regex_error(regex_constants::error_paren);
948  if (!_M_stack.empty())
949  {
950  __r._M_append(_M_stack.top());
951  _M_stack.pop();
952  }
953  __r._M_append(_M_state_store._M_insert_subexpr_end
954  (__mark, _End(__mark)));
955  _M_stack.push(__r);
956  return true;
957  }
958  return _M_bracket_expression();
959  }
960 
961  template<typename _InIter, typename _TraitsT>
962  bool
963  _Compiler<_InIter, _TraitsT>::
964  _M_bracket_expression()
965  {
966  if (_M_match_token(_ScannerT::_S_token_bracket_begin))
967  {
968  _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
969  _M_traits);
970  if (!_M_bracket_list(__matcher)
971  || !_M_match_token(_ScannerT::_S_token_bracket_end))
972  __throw_regex_error(regex_constants::error_brack);
973  _M_stack.push(_StateSeq(_M_state_store,
974  _M_state_store._M_insert_matcher(__matcher)));
975  return true;
976  }
977  return false;
978  }
979 
980  // If the dash is the last character in the bracket expression, it is not
981  // special.
982  template<typename _InIter, typename _TraitsT>
983  bool
984  _Compiler<_InIter, _TraitsT>::
985  _M_bracket_list(_RMatcherT& __matcher)
986  {
987  if (_M_follow_list(__matcher))
988  {
989  if (_M_match_token(_ScannerT::_S_token_dash))
990  __matcher._M_add_char(_M_cur_value[0]);
991  return true;
992  }
993  return false;
994  }
995 
996  template<typename _InIter, typename _TraitsT>
997  bool
998  _Compiler<_InIter, _TraitsT>::
999  _M_follow_list(_RMatcherT& __matcher)
1000  { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
1001 
1002  template<typename _InIter, typename _TraitsT>
1003  bool
1004  _Compiler<_InIter, _TraitsT>::
1005  _M_follow_list2(_RMatcherT& __matcher)
1006  {
1007  if (_M_expression_term(__matcher))
1008  return _M_follow_list2(__matcher);
1009  return true;
1010  }
1011 
1012  template<typename _InIter, typename _TraitsT>
1013  bool
1014  _Compiler<_InIter, _TraitsT>::
1015  _M_expression_term(_RMatcherT& __matcher)
1016  {
1017  return (_M_collating_symbol(__matcher)
1018  || _M_character_class(__matcher)
1019  || _M_equivalence_class(__matcher)
1020  || (_M_start_range(__matcher)
1021  && _M_range_expression(__matcher)));
1022  }
1023 
1024  template<typename _InIter, typename _TraitsT>
1025  bool
1026  _Compiler<_InIter, _TraitsT>::
1027  _M_range_expression(_RMatcherT& __matcher)
1028  {
1029  if (!_M_collating_symbol(__matcher))
1030  if (!_M_match_token(_ScannerT::_S_token_dash))
1031  __throw_regex_error(regex_constants::error_range);
1032  __matcher._M_make_range();
1033  return true;
1034  }
1035 
1036  template<typename _InIter, typename _TraitsT>
1037  bool
1038  _Compiler<_InIter, _TraitsT>::
1039  _M_start_range(_RMatcherT& __matcher)
1040  { return _M_match_token(_ScannerT::_S_token_dash); }
1041 
1042  template<typename _InIter, typename _TraitsT>
1043  bool
1044  _Compiler<_InIter, _TraitsT>::
1045  _M_collating_symbol(_RMatcherT& __matcher)
1046  {
1047  if (_M_match_token(_ScannerT::_S_token_collelem_single))
1048  {
1049  __matcher._M_add_char(_M_cur_value[0]);
1050  return true;
1051  }
1052  if (_M_match_token(_ScannerT::_S_token_collsymbol))
1053  {
1054  __matcher._M_add_collating_element(_M_cur_value);
1055  return true;
1056  }
1057  return false;
1058  }
1059 
1060  template<typename _InIter, typename _TraitsT>
1061  bool
1062  _Compiler<_InIter, _TraitsT>::
1063  _M_equivalence_class(_RMatcherT& __matcher)
1064  {
1065  if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
1066  {
1067  __matcher._M_add_equivalence_class(_M_cur_value);
1068  return true;
1069  }
1070  return false;
1071  }
1072 
1073  template<typename _InIter, typename _TraitsT>
1074  bool
1075  _Compiler<_InIter, _TraitsT>::
1076  _M_character_class(_RMatcherT& __matcher)
1077  {
1078  if (_M_match_token(_ScannerT::_S_token_char_class_name))
1079  {
1080  __matcher._M_add_character_class(_M_cur_value);
1081  return true;
1082  }
1083  return false;
1084  }
1085 
1086  template<typename _InIter, typename _TraitsT>
1087  int
1088  _Compiler<_InIter, _TraitsT>::
1089  _M_cur_int_value(int __radix)
1090  {
1091  int __v = 0;
1092  for (typename _StringT::size_type __i = 0;
1093  __i < _M_cur_value.length(); ++__i)
1094  __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
1095  return __v;
1096  }
1097 
1098  template<typename _InIter, typename _TraitsT>
1099  _AutomatonPtr
1100  __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
1102  { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
1103  __f)._M_nfa())); }
1104 
1105 _GLIBCXX_END_NAMESPACE_VERSION
1106 } // namespace __regex
1107 } // namespace std
1108 
1109 /* vim: set ts=8 sw=2 sts=2: */