libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 // A non-standard switch to let the user pick the matching algorithm.
32 // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
33 // algorithm will be used. This algorithm is not enabled by default,
34 // and cannot be used if the regex contains back-references, but has better
35 // (polynomial instead of exponential) worst case performance.
36 // See __regex_algo_impl below.
37 
38 namespace std _GLIBCXX_VISIBILITY(default)
39 {
40 namespace __detail
41 {
42 _GLIBCXX_BEGIN_NAMESPACE_VERSION
43 
44  // Result of merging regex_match and regex_search.
45  //
46  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
47  // the other one if possible, for test purpose).
48  //
49  // That __match_mode is true means regex_match, else regex_search.
50  template<typename _BiIter, typename _Alloc,
51  typename _CharT, typename _TraitsT,
52  _RegexExecutorPolicy __policy,
53  bool __match_mode>
54  bool
55  __regex_algo_impl(_BiIter __s,
56  _BiIter __e,
57  match_results<_BiIter, _Alloc>& __m,
58  const basic_regex<_CharT, _TraitsT>& __re,
60  {
61  if (__re._M_automaton == nullptr)
62  return false;
63 
64  typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
65  __m._M_begin = __s;
66  __m._M_resize(__re._M_automaton->_M_sub_count());
67  for (auto& __it : __res)
68  __it.matched = false;
69 
70  // __policy is used by testsuites so that they can use Thompson NFA
71  // without defining a macro. Users should define
72  // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
73  bool __ret;
74  if (!__re._M_automaton->_M_has_backref
75  && !(__re._M_flags & regex_constants::ECMAScript)
76 #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
77  && __policy == _RegexExecutorPolicy::_S_alternate
78 #endif
79  )
80  {
81  _Executor<_BiIter, _Alloc, _TraitsT, false>
82  __executor(__s, __e, __m, __re, __flags);
83  if (__match_mode)
84  __ret = __executor._M_match();
85  else
86  __ret = __executor._M_search();
87  }
88  else
89  {
90  _Executor<_BiIter, _Alloc, _TraitsT, true>
91  __executor(__s, __e, __m, __re, __flags);
92  if (__match_mode)
93  __ret = __executor._M_match();
94  else
95  __ret = __executor._M_search();
96  }
97  if (__ret)
98  {
99  for (auto& __it : __res)
100  if (!__it.matched)
101  __it.first = __it.second = __e;
102  auto& __pre = __m._M_prefix();
103  auto& __suf = __m._M_suffix();
104  if (__match_mode)
105  {
106  __pre.matched = false;
107  __pre.first = __s;
108  __pre.second = __s;
109  __suf.matched = false;
110  __suf.first = __e;
111  __suf.second = __e;
112  }
113  else
114  {
115  __pre.first = __s;
116  __pre.second = __res[0].first;
117  __pre.matched = (__pre.first != __pre.second);
118  __suf.first = __res[0].second;
119  __suf.second = __e;
120  __suf.matched = (__suf.first != __suf.second);
121  }
122  }
123  else
124  {
125  __m._M_resize(0);
126  for (auto& __it : __res)
127  {
128  __it.matched = false;
129  __it.first = __it.second = __e;
130  }
131  }
132  return __ret;
133  }
134 
135 _GLIBCXX_END_NAMESPACE_VERSION
136 }
137 
138 _GLIBCXX_BEGIN_NAMESPACE_VERSION
139 
140  template<typename _Ch_type>
141  template<typename _Fwd_iter>
142  typename regex_traits<_Ch_type>::string_type
144  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
145  {
146  typedef std::ctype<char_type> __ctype_type;
147  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
148 
149  static const char* __collatenames[] =
150  {
151  "NUL",
152  "SOH",
153  "STX",
154  "ETX",
155  "EOT",
156  "ENQ",
157  "ACK",
158  "alert",
159  "backspace",
160  "tab",
161  "newline",
162  "vertical-tab",
163  "form-feed",
164  "carriage-return",
165  "SO",
166  "SI",
167  "DLE",
168  "DC1",
169  "DC2",
170  "DC3",
171  "DC4",
172  "NAK",
173  "SYN",
174  "ETB",
175  "CAN",
176  "EM",
177  "SUB",
178  "ESC",
179  "IS4",
180  "IS3",
181  "IS2",
182  "IS1",
183  "space",
184  "exclamation-mark",
185  "quotation-mark",
186  "number-sign",
187  "dollar-sign",
188  "percent-sign",
189  "ampersand",
190  "apostrophe",
191  "left-parenthesis",
192  "right-parenthesis",
193  "asterisk",
194  "plus-sign",
195  "comma",
196  "hyphen",
197  "period",
198  "slash",
199  "zero",
200  "one",
201  "two",
202  "three",
203  "four",
204  "five",
205  "six",
206  "seven",
207  "eight",
208  "nine",
209  "colon",
210  "semicolon",
211  "less-than-sign",
212  "equals-sign",
213  "greater-than-sign",
214  "question-mark",
215  "commercial-at",
216  "A",
217  "B",
218  "C",
219  "D",
220  "E",
221  "F",
222  "G",
223  "H",
224  "I",
225  "J",
226  "K",
227  "L",
228  "M",
229  "N",
230  "O",
231  "P",
232  "Q",
233  "R",
234  "S",
235  "T",
236  "U",
237  "V",
238  "W",
239  "X",
240  "Y",
241  "Z",
242  "left-square-bracket",
243  "backslash",
244  "right-square-bracket",
245  "circumflex",
246  "underscore",
247  "grave-accent",
248  "a",
249  "b",
250  "c",
251  "d",
252  "e",
253  "f",
254  "g",
255  "h",
256  "i",
257  "j",
258  "k",
259  "l",
260  "m",
261  "n",
262  "o",
263  "p",
264  "q",
265  "r",
266  "s",
267  "t",
268  "u",
269  "v",
270  "w",
271  "x",
272  "y",
273  "z",
274  "left-curly-bracket",
275  "vertical-line",
276  "right-curly-bracket",
277  "tilde",
278  "DEL",
279  };
280 
281  string __s;
282  for (; __first != __last; ++__first)
283  __s += __fctyp.narrow(*__first, 0);
284 
285  for (const auto& __it : __collatenames)
286  if (__s == __it)
287  return string_type(1, __fctyp.widen(
288  static_cast<char>(&__it - __collatenames)));
289 
290  // TODO Add digraph support:
291  // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
292 
293  return string_type();
294  }
295 
296  template<typename _Ch_type>
297  template<typename _Fwd_iter>
298  typename regex_traits<_Ch_type>::char_class_type
300  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
301  {
302  typedef std::ctype<char_type> __ctype_type;
303  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
304 
305  // Mappings from class name to class mask.
306  static const pair<const char*, char_class_type> __classnames[] =
307  {
308  {"d", ctype_base::digit},
309  {"w", {ctype_base::alnum, _RegexMask::_S_under}},
310  {"s", ctype_base::space},
311  {"alnum", ctype_base::alnum},
312  {"alpha", ctype_base::alpha},
313  {"blank", ctype_base::blank},
314  {"cntrl", ctype_base::cntrl},
315  {"digit", ctype_base::digit},
316  {"graph", ctype_base::graph},
317  {"lower", ctype_base::lower},
318  {"print", ctype_base::print},
319  {"punct", ctype_base::punct},
320  {"space", ctype_base::space},
321  {"upper", ctype_base::upper},
322  {"xdigit", ctype_base::xdigit},
323  };
324 
325  string __s;
326  for (; __first != __last; ++__first)
327  __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
328 
329  for (const auto& __it : __classnames)
330  if (__s == __it.first)
331  {
332  if (__icase
333  && ((__it.second
334  & (ctype_base::lower | ctype_base::upper)) != 0))
335  return ctype_base::alpha;
336  return __it.second;
337  }
338  return 0;
339  }
340 
341  template<typename _Ch_type>
342  bool
344  isctype(_Ch_type __c, char_class_type __f) const
345  {
346  typedef std::ctype<char_type> __ctype_type;
347  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
348 
349  return __fctyp.is(__f._M_base, __c)
350  // [[:w:]]
351  || ((__f._M_extended & _RegexMask::_S_under)
352  && __c == __fctyp.widen('_'));
353  }
354 
355  template<typename _Ch_type>
356  int
358  value(_Ch_type __ch, int __radix) const
359  {
361  long __v;
362  if (__radix == 8)
363  __is >> std::oct;
364  else if (__radix == 16)
365  __is >> std::hex;
366  __is >> __v;
367  return __is.fail() ? -1 : __v;
368  }
369 
370  template<typename _Bi_iter, typename _Alloc>
371  template<typename _Out_iter>
373  format(_Out_iter __out,
374  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
375  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
376  match_flag_type __flags) const
377  {
378  _GLIBCXX_DEBUG_ASSERT( ready() );
379  regex_traits<char_type> __traits;
380  typedef std::ctype<char_type> __ctype_type;
381  const __ctype_type&
382  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
383 
384  auto __output = [&](size_t __idx)
385  {
386  auto& __sub = (*this)[__idx];
387  if (__sub.matched)
388  __out = std::copy(__sub.first, __sub.second, __out);
389  };
390 
391  if (__flags & regex_constants::format_sed)
392  {
393  for (; __fmt_first != __fmt_last;)
394  if (*__fmt_first == '&')
395  {
396  __output(0);
397  ++__fmt_first;
398  }
399  else if (*__fmt_first == '\\')
400  {
401  if (++__fmt_first != __fmt_last
402  && __fctyp.is(__ctype_type::digit, *__fmt_first))
403  __output(__traits.value(*__fmt_first++, 10));
404  else
405  *__out++ = '\\';
406  }
407  else
408  *__out++ = *__fmt_first++;
409  }
410  else
411  {
412  while (1)
413  {
414  auto __next = std::find(__fmt_first, __fmt_last, '$');
415  if (__next == __fmt_last)
416  break;
417 
418  __out = std::copy(__fmt_first, __next, __out);
419 
420  auto __eat = [&](char __ch) -> bool
421  {
422  if (*__next == __ch)
423  {
424  ++__next;
425  return true;
426  }
427  return false;
428  };
429 
430  if (++__next == __fmt_last)
431  *__out++ = '$';
432  else if (__eat('$'))
433  *__out++ = '$';
434  else if (__eat('&'))
435  __output(0);
436  else if (__eat('`'))
437  {
438  auto& __sub = _M_prefix();
439  if (__sub.matched)
440  __out = std::copy(__sub.first, __sub.second, __out);
441  }
442  else if (__eat('\''))
443  {
444  auto& __sub = _M_suffix();
445  if (__sub.matched)
446  __out = std::copy(__sub.first, __sub.second, __out);
447  }
448  else if (__fctyp.is(__ctype_type::digit, *__next))
449  {
450  long __num = __traits.value(*__next, 10);
451  if (++__next != __fmt_last
452  && __fctyp.is(__ctype_type::digit, *__next))
453  {
454  __num *= 10;
455  __num += __traits.value(*__next++, 10);
456  }
457  if (0 <= __num && __num < this->size())
458  __output(__num);
459  }
460  else
461  *__out++ = '$';
462  __fmt_first = __next;
463  }
464  __out = std::copy(__fmt_first, __fmt_last, __out);
465  }
466  return __out;
467  }
468 
469  template<typename _Out_iter, typename _Bi_iter,
470  typename _Rx_traits, typename _Ch_type>
471  _Out_iter
472  regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
474  const _Ch_type* __fmt,
476  {
478  _IterT __i(__first, __last, __e, __flags);
479  _IterT __end;
480  if (__i == __end)
481  {
482  if (!(__flags & regex_constants::format_no_copy))
483  __out = std::copy(__first, __last, __out);
484  }
485  else
486  {
487  sub_match<_Bi_iter> __last;
488  auto __len = char_traits<_Ch_type>::length(__fmt);
489  for (; __i != __end; ++__i)
490  {
491  if (!(__flags & regex_constants::format_no_copy))
492  __out = std::copy(__i->prefix().first, __i->prefix().second,
493  __out);
494  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
495  __last = __i->suffix();
497  break;
498  }
499  if (!(__flags & regex_constants::format_no_copy))
500  __out = std::copy(__last.first, __last.second, __out);
501  }
502  return __out;
503  }
504 
505  template<typename _Bi_iter,
506  typename _Ch_type,
507  typename _Rx_traits>
508  bool
510  operator==(const regex_iterator& __rhs) const
511  {
512  return (_M_match.empty() && __rhs._M_match.empty())
513  || (_M_begin == __rhs._M_begin
514  && _M_end == __rhs._M_end
515  && _M_pregex == __rhs._M_pregex
516  && _M_flags == __rhs._M_flags
517  && _M_match[0] == __rhs._M_match[0]);
518  }
519 
520  template<typename _Bi_iter,
521  typename _Ch_type,
522  typename _Rx_traits>
526  {
527  // In all cases in which the call to regex_search returns true,
528  // match.prefix().first shall be equal to the previous value of
529  // match[0].second, and for each index i in the half-open range
530  // [0, match.size()) for which match[i].matched is true,
531  // match[i].position() shall return distance(begin, match[i].first).
532  // [28.12.1.4.5]
533  if (_M_match[0].matched)
534  {
535  auto __start = _M_match[0].second;
536  auto __prefix_first = _M_match[0].second;
537  if (_M_match[0].first == _M_match[0].second)
538  {
539  if (__start == _M_end)
540  {
541  _M_match = value_type();
542  return *this;
543  }
544  else
545  {
546  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
547  _M_flags
550  {
551  _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
552  auto& __prefix = _M_match._M_prefix();
553  __prefix.first = __prefix_first;
554  __prefix.matched = __prefix.first != __prefix.second;
555  // [28.12.1.4.5]
556  _M_match._M_begin = _M_begin;
557  return *this;
558  }
559  else
560  ++__start;
561  }
562  }
564  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
565  {
566  _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
567  auto& __prefix = _M_match._M_prefix();
568  __prefix.first = __prefix_first;
569  __prefix.matched = __prefix.first != __prefix.second;
570  // [28.12.1.4.5]
571  _M_match._M_begin = _M_begin;
572  }
573  else
574  _M_match = value_type();
575  }
576  return *this;
577  }
578 
579  template<typename _Bi_iter,
580  typename _Ch_type,
581  typename _Rx_traits>
585  {
586  _M_position = __rhs._M_position;
587  _M_subs = __rhs._M_subs;
588  _M_n = __rhs._M_n;
589  _M_suffix = __rhs._M_suffix;
590  _M_has_m1 = __rhs._M_has_m1;
591  _M_normalize_result();
592  return *this;
593  }
594 
595  template<typename _Bi_iter,
596  typename _Ch_type,
597  typename _Rx_traits>
598  bool
601  {
602  if (_M_end_of_seq() && __rhs._M_end_of_seq())
603  return true;
604  if (_M_suffix.matched && __rhs._M_suffix.matched
605  && _M_suffix == __rhs._M_suffix)
606  return true;
607  if (_M_end_of_seq() || _M_suffix.matched
608  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
609  return false;
610  return _M_position == __rhs._M_position
611  && _M_n == __rhs._M_n
612  && _M_subs == __rhs._M_subs;
613  }
614 
615  template<typename _Bi_iter,
616  typename _Ch_type,
617  typename _Rx_traits>
621  {
622  _Position __prev = _M_position;
623  if (_M_suffix.matched)
624  *this = regex_token_iterator();
625  else if (_M_n + 1 < _M_subs.size())
626  {
627  _M_n++;
628  _M_result = &_M_current_match();
629  }
630  else
631  {
632  _M_n = 0;
633  ++_M_position;
634  if (_M_position != _Position())
635  _M_result = &_M_current_match();
636  else if (_M_has_m1 && __prev->suffix().length() != 0)
637  {
638  _M_suffix.matched = true;
639  _M_suffix.first = __prev->suffix().first;
640  _M_suffix.second = __prev->suffix().second;
641  _M_result = &_M_suffix;
642  }
643  else
644  *this = regex_token_iterator();
645  }
646  return *this;
647  }
648 
649  template<typename _Bi_iter,
650  typename _Ch_type,
651  typename _Rx_traits>
652  void
654  _M_init(_Bi_iter __a, _Bi_iter __b)
655  {
656  _M_has_m1 = false;
657  for (auto __it : _M_subs)
658  if (__it == -1)
659  {
660  _M_has_m1 = true;
661  break;
662  }
663  if (_M_position != _Position())
664  _M_result = &_M_current_match();
665  else if (_M_has_m1)
666  {
667  _M_suffix.matched = true;
668  _M_suffix.first = __a;
669  _M_suffix.second = __b;
670  _M_result = &_M_suffix;
671  }
672  else
673  _M_result = nullptr;
674  }
675 
676 _GLIBCXX_END_NAMESPACE_VERSION
677 } // namespace
678 
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:1024
constexpr match_flag_type match_prev_avail
bool empty() const
Indicates if the match_results contains no results.
Definition: regex.h:1632
Basis for explicit traits specializations.
Definition: char_traits.h:227
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition: regex.tcc:358
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition: regex.tcc:144
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition: regex.tcc:620
bool fail() const
Fast error checking.
Definition: basic_ios.h:201
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition: regex.tcc:584
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition: regex.tcc:300
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition: regex.h:377
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
Managing sequences of characters and character-like objects.
constexpr match_flag_type format_no_copy
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition: regex.tcc:344
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:1032
constexpr match_flag_type format_sed
Describes aspects of a regular expression.
Definition: regex.h:87
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition: regex.tcc:600
_Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const basic_string< _Ch_type, _St, _Sa > &__fmt, regex_constants::match_flag_type __flags=regex_constants::match_default)
Search for a regular expression within a range for multiple times, and replace the matched parts thro...
Definition: regex.h:2288
Controlling input for std::string.
Definition: iosfwd:100
regex_iterator & operator++()
Increments a regex_iterator.
Definition: regex.tcc:525
bool operator==(const regex_iterator &__rhs) const
Tests the equivalence of two regex iterators.
Definition: regex.tcc:510
Primary class template ctype facet.This template class defines classification and conversion function...
match_flag_type
This is a bitmask type indicating regex matching rules.
_T2 second
first is a copy of the first object
Definition: stl_pair.h:102
constexpr syntax_option_type ECMAScript
Struct holding two objects of arbitrary type.
Definition: stl_pair.h:96
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2140
_T1 first
second_type is the second bound type
Definition: stl_pair.h:101
ISO C++ entities toplevel namespace is std.
constexpr match_flag_type format_first_only
constexpr match_flag_type match_not_null
constexpr match_flag_type match_continuous