libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2016 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37  // Result of merging regex_match and regex_search.
38  //
39  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40  // the other one if possible, for test purpose).
41  //
42  // That __match_mode is true means regex_match, else regex_search.
43  template<typename _BiIter, typename _Alloc,
44  typename _CharT, typename _TraitsT,
45  _RegexExecutorPolicy __policy,
46  bool __match_mode>
47  bool
48  __regex_algo_impl(_BiIter __s,
49  _BiIter __e,
50  match_results<_BiIter, _Alloc>& __m,
51  const basic_regex<_CharT, _TraitsT>& __re,
53  {
54  if (__re._M_automaton == nullptr)
55  return false;
56 
57  typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58  __m._M_begin = __s;
59  __m._M_resize(__re._M_automaton->_M_sub_count());
60  for (auto& __it : __res)
61  __it.matched = false;
62 
63  bool __ret;
64  if ((__re.flags() & regex_constants::__polynomial)
65  || (__policy == _RegexExecutorPolicy::_S_alternate
66  && !__re._M_automaton->_M_has_backref))
67  {
68  _Executor<_BiIter, _Alloc, _TraitsT, false>
69  __executor(__s, __e, __m, __re, __flags);
70  if (__match_mode)
71  __ret = __executor._M_match();
72  else
73  __ret = __executor._M_search();
74  }
75  else
76  {
77  _Executor<_BiIter, _Alloc, _TraitsT, true>
78  __executor(__s, __e, __m, __re, __flags);
79  if (__match_mode)
80  __ret = __executor._M_match();
81  else
82  __ret = __executor._M_search();
83  }
84  if (__ret)
85  {
86  for (auto& __it : __res)
87  if (!__it.matched)
88  __it.first = __it.second = __e;
89  auto& __pre = __m._M_prefix();
90  auto& __suf = __m._M_suffix();
91  if (__match_mode)
92  {
93  __pre.matched = false;
94  __pre.first = __s;
95  __pre.second = __s;
96  __suf.matched = false;
97  __suf.first = __e;
98  __suf.second = __e;
99  }
100  else
101  {
102  __pre.first = __s;
103  __pre.second = __res[0].first;
104  __pre.matched = (__pre.first != __pre.second);
105  __suf.first = __res[0].second;
106  __suf.second = __e;
107  __suf.matched = (__suf.first != __suf.second);
108  }
109  }
110  else
111  {
112  __m._M_resize(0);
113  for (auto& __it : __res)
114  {
115  __it.matched = false;
116  __it.first = __it.second = __e;
117  }
118  }
119  return __ret;
120  }
121 
122 _GLIBCXX_END_NAMESPACE_VERSION
123 }
124 
125 _GLIBCXX_BEGIN_NAMESPACE_VERSION
126 
127  template<typename _Ch_type>
128  template<typename _Fwd_iter>
129  typename regex_traits<_Ch_type>::string_type
131  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
132  {
133  typedef std::ctype<char_type> __ctype_type;
134  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
135 
136  static const char* __collatenames[] =
137  {
138  "NUL",
139  "SOH",
140  "STX",
141  "ETX",
142  "EOT",
143  "ENQ",
144  "ACK",
145  "alert",
146  "backspace",
147  "tab",
148  "newline",
149  "vertical-tab",
150  "form-feed",
151  "carriage-return",
152  "SO",
153  "SI",
154  "DLE",
155  "DC1",
156  "DC2",
157  "DC3",
158  "DC4",
159  "NAK",
160  "SYN",
161  "ETB",
162  "CAN",
163  "EM",
164  "SUB",
165  "ESC",
166  "IS4",
167  "IS3",
168  "IS2",
169  "IS1",
170  "space",
171  "exclamation-mark",
172  "quotation-mark",
173  "number-sign",
174  "dollar-sign",
175  "percent-sign",
176  "ampersand",
177  "apostrophe",
178  "left-parenthesis",
179  "right-parenthesis",
180  "asterisk",
181  "plus-sign",
182  "comma",
183  "hyphen",
184  "period",
185  "slash",
186  "zero",
187  "one",
188  "two",
189  "three",
190  "four",
191  "five",
192  "six",
193  "seven",
194  "eight",
195  "nine",
196  "colon",
197  "semicolon",
198  "less-than-sign",
199  "equals-sign",
200  "greater-than-sign",
201  "question-mark",
202  "commercial-at",
203  "A",
204  "B",
205  "C",
206  "D",
207  "E",
208  "F",
209  "G",
210  "H",
211  "I",
212  "J",
213  "K",
214  "L",
215  "M",
216  "N",
217  "O",
218  "P",
219  "Q",
220  "R",
221  "S",
222  "T",
223  "U",
224  "V",
225  "W",
226  "X",
227  "Y",
228  "Z",
229  "left-square-bracket",
230  "backslash",
231  "right-square-bracket",
232  "circumflex",
233  "underscore",
234  "grave-accent",
235  "a",
236  "b",
237  "c",
238  "d",
239  "e",
240  "f",
241  "g",
242  "h",
243  "i",
244  "j",
245  "k",
246  "l",
247  "m",
248  "n",
249  "o",
250  "p",
251  "q",
252  "r",
253  "s",
254  "t",
255  "u",
256  "v",
257  "w",
258  "x",
259  "y",
260  "z",
261  "left-curly-bracket",
262  "vertical-line",
263  "right-curly-bracket",
264  "tilde",
265  "DEL",
266  };
267 
268  string __s;
269  for (; __first != __last; ++__first)
270  __s += __fctyp.narrow(*__first, 0);
271 
272  for (const auto& __it : __collatenames)
273  if (__s == __it)
274  return string_type(1, __fctyp.widen(
275  static_cast<char>(&__it - __collatenames)));
276 
277  // TODO Add digraph support:
278  // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
279 
280  return string_type();
281  }
282 
283  template<typename _Ch_type>
284  template<typename _Fwd_iter>
285  typename regex_traits<_Ch_type>::char_class_type
287  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
288  {
289  typedef std::ctype<char_type> __ctype_type;
290  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
291 
292  // Mappings from class name to class mask.
293  static const pair<const char*, char_class_type> __classnames[] =
294  {
295  {"d", ctype_base::digit},
296  {"w", {ctype_base::alnum, _RegexMask::_S_under}},
297  {"s", ctype_base::space},
298  {"alnum", ctype_base::alnum},
299  {"alpha", ctype_base::alpha},
300  {"blank", ctype_base::blank},
301  {"cntrl", ctype_base::cntrl},
302  {"digit", ctype_base::digit},
303  {"graph", ctype_base::graph},
304  {"lower", ctype_base::lower},
305  {"print", ctype_base::print},
306  {"punct", ctype_base::punct},
307  {"space", ctype_base::space},
308  {"upper", ctype_base::upper},
309  {"xdigit", ctype_base::xdigit},
310  };
311 
312  string __s;
313  for (; __first != __last; ++__first)
314  __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
315 
316  for (const auto& __it : __classnames)
317  if (__s == __it.first)
318  {
319  if (__icase
320  && ((__it.second
321  & (ctype_base::lower | ctype_base::upper)) != 0))
322  return ctype_base::alpha;
323  return __it.second;
324  }
325  return 0;
326  }
327 
328  template<typename _Ch_type>
329  bool
331  isctype(_Ch_type __c, char_class_type __f) const
332  {
333  typedef std::ctype<char_type> __ctype_type;
334  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
335 
336  return __fctyp.is(__f._M_base, __c)
337  // [[:w:]]
338  || ((__f._M_extended & _RegexMask::_S_under)
339  && __c == __fctyp.widen('_'));
340  }
341 
342  template<typename _Ch_type>
343  int
345  value(_Ch_type __ch, int __radix) const
346  {
348  long __v;
349  if (__radix == 8)
350  __is >> std::oct;
351  else if (__radix == 16)
352  __is >> std::hex;
353  __is >> __v;
354  return __is.fail() ? -1 : __v;
355  }
356 
357  template<typename _Bi_iter, typename _Alloc>
358  template<typename _Out_iter>
360  format(_Out_iter __out,
361  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
362  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
363  match_flag_type __flags) const
364  {
365  __glibcxx_assert( ready() );
366  regex_traits<char_type> __traits;
367  typedef std::ctype<char_type> __ctype_type;
368  const __ctype_type&
369  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
370 
371  auto __output = [&](size_t __idx)
372  {
373  auto& __sub = (*this)[__idx];
374  if (__sub.matched)
375  __out = std::copy(__sub.first, __sub.second, __out);
376  };
377 
378  if (__flags & regex_constants::format_sed)
379  {
380  for (; __fmt_first != __fmt_last;)
381  if (*__fmt_first == '&')
382  {
383  __output(0);
384  ++__fmt_first;
385  }
386  else if (*__fmt_first == '\\')
387  {
388  if (++__fmt_first != __fmt_last
389  && __fctyp.is(__ctype_type::digit, *__fmt_first))
390  __output(__traits.value(*__fmt_first++, 10));
391  else
392  *__out++ = '\\';
393  }
394  else
395  *__out++ = *__fmt_first++;
396  }
397  else
398  {
399  while (1)
400  {
401  auto __next = std::find(__fmt_first, __fmt_last, '$');
402  if (__next == __fmt_last)
403  break;
404 
405  __out = std::copy(__fmt_first, __next, __out);
406 
407  auto __eat = [&](char __ch) -> bool
408  {
409  if (*__next == __ch)
410  {
411  ++__next;
412  return true;
413  }
414  return false;
415  };
416 
417  if (++__next == __fmt_last)
418  *__out++ = '$';
419  else if (__eat('$'))
420  *__out++ = '$';
421  else if (__eat('&'))
422  __output(0);
423  else if (__eat('`'))
424  {
425  auto& __sub = _M_prefix();
426  if (__sub.matched)
427  __out = std::copy(__sub.first, __sub.second, __out);
428  }
429  else if (__eat('\''))
430  {
431  auto& __sub = _M_suffix();
432  if (__sub.matched)
433  __out = std::copy(__sub.first, __sub.second, __out);
434  }
435  else if (__fctyp.is(__ctype_type::digit, *__next))
436  {
437  long __num = __traits.value(*__next, 10);
438  if (++__next != __fmt_last
439  && __fctyp.is(__ctype_type::digit, *__next))
440  {
441  __num *= 10;
442  __num += __traits.value(*__next++, 10);
443  }
444  if (0 <= __num && __num < this->size())
445  __output(__num);
446  }
447  else
448  *__out++ = '$';
449  __fmt_first = __next;
450  }
451  __out = std::copy(__fmt_first, __fmt_last, __out);
452  }
453  return __out;
454  }
455 
456  template<typename _Out_iter, typename _Bi_iter,
457  typename _Rx_traits, typename _Ch_type>
458  _Out_iter
459  regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
461  const _Ch_type* __fmt,
463  {
465  _IterT __i(__first, __last, __e, __flags);
466  _IterT __end;
467  if (__i == __end)
468  {
469  if (!(__flags & regex_constants::format_no_copy))
470  __out = std::copy(__first, __last, __out);
471  }
472  else
473  {
474  sub_match<_Bi_iter> __last;
475  auto __len = char_traits<_Ch_type>::length(__fmt);
476  for (; __i != __end; ++__i)
477  {
478  if (!(__flags & regex_constants::format_no_copy))
479  __out = std::copy(__i->prefix().first, __i->prefix().second,
480  __out);
481  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
482  __last = __i->suffix();
484  break;
485  }
486  if (!(__flags & regex_constants::format_no_copy))
487  __out = std::copy(__last.first, __last.second, __out);
488  }
489  return __out;
490  }
491 
492  template<typename _Bi_iter,
493  typename _Ch_type,
494  typename _Rx_traits>
495  bool
497  operator==(const regex_iterator& __rhs) const
498  {
499  return (_M_match.empty() && __rhs._M_match.empty())
500  || (_M_begin == __rhs._M_begin
501  && _M_end == __rhs._M_end
502  && _M_pregex == __rhs._M_pregex
503  && _M_flags == __rhs._M_flags
504  && _M_match[0] == __rhs._M_match[0]);
505  }
506 
507  template<typename _Bi_iter,
508  typename _Ch_type,
509  typename _Rx_traits>
513  {
514  // In all cases in which the call to regex_search returns true,
515  // match.prefix().first shall be equal to the previous value of
516  // match[0].second, and for each index i in the half-open range
517  // [0, match.size()) for which match[i].matched is true,
518  // match[i].position() shall return distance(begin, match[i].first).
519  // [28.12.1.4.5]
520  if (_M_match[0].matched)
521  {
522  auto __start = _M_match[0].second;
523  auto __prefix_first = _M_match[0].second;
524  if (_M_match[0].first == _M_match[0].second)
525  {
526  if (__start == _M_end)
527  {
528  _M_match = value_type();
529  return *this;
530  }
531  else
532  {
533  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
534  _M_flags
537  {
538  __glibcxx_assert(_M_match[0].matched);
539  auto& __prefix = _M_match._M_prefix();
540  __prefix.first = __prefix_first;
541  __prefix.matched = __prefix.first != __prefix.second;
542  // [28.12.1.4.5]
543  _M_match._M_begin = _M_begin;
544  return *this;
545  }
546  else
547  ++__start;
548  }
549  }
551  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
552  {
553  __glibcxx_assert(_M_match[0].matched);
554  auto& __prefix = _M_match._M_prefix();
555  __prefix.first = __prefix_first;
556  __prefix.matched = __prefix.first != __prefix.second;
557  // [28.12.1.4.5]
558  _M_match._M_begin = _M_begin;
559  }
560  else
561  _M_match = value_type();
562  }
563  return *this;
564  }
565 
566  template<typename _Bi_iter,
567  typename _Ch_type,
568  typename _Rx_traits>
572  {
573  _M_position = __rhs._M_position;
574  _M_subs = __rhs._M_subs;
575  _M_n = __rhs._M_n;
576  _M_suffix = __rhs._M_suffix;
577  _M_has_m1 = __rhs._M_has_m1;
578  _M_normalize_result();
579  return *this;
580  }
581 
582  template<typename _Bi_iter,
583  typename _Ch_type,
584  typename _Rx_traits>
585  bool
588  {
589  if (_M_end_of_seq() && __rhs._M_end_of_seq())
590  return true;
591  if (_M_suffix.matched && __rhs._M_suffix.matched
592  && _M_suffix == __rhs._M_suffix)
593  return true;
594  if (_M_end_of_seq() || _M_suffix.matched
595  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
596  return false;
597  return _M_position == __rhs._M_position
598  && _M_n == __rhs._M_n
599  && _M_subs == __rhs._M_subs;
600  }
601 
602  template<typename _Bi_iter,
603  typename _Ch_type,
604  typename _Rx_traits>
608  {
609  _Position __prev = _M_position;
610  if (_M_suffix.matched)
611  *this = regex_token_iterator();
612  else if (_M_n + 1 < _M_subs.size())
613  {
614  _M_n++;
615  _M_result = &_M_current_match();
616  }
617  else
618  {
619  _M_n = 0;
620  ++_M_position;
621  if (_M_position != _Position())
622  _M_result = &_M_current_match();
623  else if (_M_has_m1 && __prev->suffix().length() != 0)
624  {
625  _M_suffix.matched = true;
626  _M_suffix.first = __prev->suffix().first;
627  _M_suffix.second = __prev->suffix().second;
628  _M_result = &_M_suffix;
629  }
630  else
631  *this = regex_token_iterator();
632  }
633  return *this;
634  }
635 
636  template<typename _Bi_iter,
637  typename _Ch_type,
638  typename _Rx_traits>
639  void
641  _M_init(_Bi_iter __a, _Bi_iter __b)
642  {
643  _M_has_m1 = false;
644  for (auto __it : _M_subs)
645  if (__it == -1)
646  {
647  _M_has_m1 = true;
648  break;
649  }
650  if (_M_position != _Position())
651  _M_result = &_M_current_match();
652  else if (_M_has_m1)
653  {
654  _M_suffix.matched = true;
655  _M_suffix.first = __a;
656  _M_suffix.second = __b;
657  _M_result = &_M_suffix;
658  }
659  else
660  _M_result = nullptr;
661  }
662 
663 _GLIBCXX_END_NAMESPACE_VERSION
664 } // namespace
665 
_T2 second
first is a copy of the first object
Definition: stl_pair.h:196
constexpr match_flag_type match_prev_avail
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition: regex.tcc:287
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition: regex.tcc:345
match_flag_type
This is a bitmask type indicating regex matching rules.
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2146
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
constexpr match_flag_type match_not_null
Managing sequences of characters and character-like objects.
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition: regex.tcc:571
constexpr match_flag_type format_no_copy
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition: regex.tcc:331
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:1024
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:1032
constexpr match_flag_type format_sed
_T1 first
second_type is the second bound type
Definition: stl_pair.h:195
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition: regex.tcc:131
constexpr syntax_option_type __polynomial
Primary class template ctype facet.This template class defines classification and conversion function...
bool fail() const
Fast error checking.
Definition: basic_ios.h:201
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition: regex.tcc:607
Controlling input for std::string.
Definition: iosfwd:100
_Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const basic_string< _Ch_type, _St, _Sa > &__fmt, regex_constants::match_flag_type __flags=regex_constants::match_default)
Search for a regular expression within a range for multiple times, and replace the matched parts thro...
Definition: regex.h:2294
constexpr match_flag_type match_continuous
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition: regex.h:377
regex_iterator & operator++()
Increments a regex_iterator.
Definition: regex.tcc:512
Struct holding two objects of arbitrary type.
Definition: stl_pair.h:190
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition: regex.tcc:587
constexpr match_flag_type format_first_only
Basis for explicit traits specializations.
Definition: char_traits.h:227
Describes aspects of a regular expression.
Definition: regex.h:87
ISO C++ entities toplevel namespace is std.
bool empty() const
Indicates if the match_results contains no results.
Definition: regex.h:1635
bool operator==(const regex_iterator &__rhs) const
Tests the equivalence of two regex iterators.
Definition: regex.tcc:497