libstdc++
regex.tcc
Go to the documentation of this file.
1// class template regex -*- C++ -*-
2
3// Copyright (C) 2013-2021 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25/**
26 * @file bits/regex.tcc
27 * This is an internal header file, included by other library headers.
28 * Do not attempt to use it directly. @headername{regex}
29 */
30
31namespace std _GLIBCXX_VISIBILITY(default)
32{
33_GLIBCXX_BEGIN_NAMESPACE_VERSION
34
35namespace __detail
36{
37 /// @cond undocumented
38
39 // Result of merging regex_match and regex_search.
40 //
41 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42 // the other one if possible, for test purpose).
43 //
44 // That __match_mode is true means regex_match, else regex_search.
45 template<typename _BiIter, typename _Alloc,
46 typename _CharT, typename _TraitsT,
47 _RegexExecutorPolicy __policy,
48 bool __match_mode>
49 bool
50 __regex_algo_impl(_BiIter __s,
51 _BiIter __e,
52 match_results<_BiIter, _Alloc>& __m,
53 const basic_regex<_CharT, _TraitsT>& __re,
55 {
56 if (__re._M_automaton == nullptr)
57 return false;
58
59 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
60 __m._M_begin = __s;
61 __m._M_resize(__re._M_automaton->_M_sub_count());
62
63 bool __ret;
64 if ((__re.flags() & regex_constants::__polynomial)
65 || (__policy == _RegexExecutorPolicy::_S_alternate
66 && !__re._M_automaton->_M_has_backref))
67 {
68 _Executor<_BiIter, _Alloc, _TraitsT, false>
69 __executor(__s, __e, __m, __re, __flags);
70 if (__match_mode)
71 __ret = __executor._M_match();
72 else
73 __ret = __executor._M_search();
74 }
75 else
76 {
77 _Executor<_BiIter, _Alloc, _TraitsT, true>
78 __executor(__s, __e, __m, __re, __flags);
79 if (__match_mode)
80 __ret = __executor._M_match();
81 else
82 __ret = __executor._M_search();
83 }
84 if (__ret)
85 {
86 for (auto& __it : __res)
87 if (!__it.matched)
88 __it.first = __it.second = __e;
89 auto& __pre = __m._M_prefix();
90 auto& __suf = __m._M_suffix();
91 if (__match_mode)
92 {
93 __pre.matched = false;
94 __pre.first = __s;
95 __pre.second = __s;
96 __suf.matched = false;
97 __suf.first = __e;
98 __suf.second = __e;
99 }
100 else
101 {
102 __pre.first = __s;
103 __pre.second = __res[0].first;
104 __pre.matched = (__pre.first != __pre.second);
105 __suf.first = __res[0].second;
106 __suf.second = __e;
107 __suf.matched = (__suf.first != __suf.second);
108 }
109 }
110 else
111 {
112 __m._M_establish_failed_match(__e);
113 }
114 return __ret;
115 }
116 /// @endcond
117} // namespace __detail
118
119 /// @cond
120
121 template<typename _Ch_type>
122 template<typename _Fwd_iter>
123 typename regex_traits<_Ch_type>::string_type
125 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
126 {
127 typedef std::ctype<char_type> __ctype_type;
128 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
129
130 static const char* __collatenames[] =
131 {
132 "NUL",
133 "SOH",
134 "STX",
135 "ETX",
136 "EOT",
137 "ENQ",
138 "ACK",
139 "alert",
140 "backspace",
141 "tab",
142 "newline",
143 "vertical-tab",
144 "form-feed",
145 "carriage-return",
146 "SO",
147 "SI",
148 "DLE",
149 "DC1",
150 "DC2",
151 "DC3",
152 "DC4",
153 "NAK",
154 "SYN",
155 "ETB",
156 "CAN",
157 "EM",
158 "SUB",
159 "ESC",
160 "IS4",
161 "IS3",
162 "IS2",
163 "IS1",
164 "space",
165 "exclamation-mark",
166 "quotation-mark",
167 "number-sign",
168 "dollar-sign",
169 "percent-sign",
170 "ampersand",
171 "apostrophe",
172 "left-parenthesis",
173 "right-parenthesis",
174 "asterisk",
175 "plus-sign",
176 "comma",
177 "hyphen",
178 "period",
179 "slash",
180 "zero",
181 "one",
182 "two",
183 "three",
184 "four",
185 "five",
186 "six",
187 "seven",
188 "eight",
189 "nine",
190 "colon",
191 "semicolon",
192 "less-than-sign",
193 "equals-sign",
194 "greater-than-sign",
195 "question-mark",
196 "commercial-at",
197 "A",
198 "B",
199 "C",
200 "D",
201 "E",
202 "F",
203 "G",
204 "H",
205 "I",
206 "J",
207 "K",
208 "L",
209 "M",
210 "N",
211 "O",
212 "P",
213 "Q",
214 "R",
215 "S",
216 "T",
217 "U",
218 "V",
219 "W",
220 "X",
221 "Y",
222 "Z",
223 "left-square-bracket",
224 "backslash",
225 "right-square-bracket",
226 "circumflex",
227 "underscore",
228 "grave-accent",
229 "a",
230 "b",
231 "c",
232 "d",
233 "e",
234 "f",
235 "g",
236 "h",
237 "i",
238 "j",
239 "k",
240 "l",
241 "m",
242 "n",
243 "o",
244 "p",
245 "q",
246 "r",
247 "s",
248 "t",
249 "u",
250 "v",
251 "w",
252 "x",
253 "y",
254 "z",
255 "left-curly-bracket",
256 "vertical-line",
257 "right-curly-bracket",
258 "tilde",
259 "DEL",
260 };
261
262 string __s;
263 for (; __first != __last; ++__first)
264 __s += __fctyp.narrow(*__first, 0);
265
266 for (const auto& __it : __collatenames)
267 if (__s == __it)
268 return string_type(1, __fctyp.widen(
269 static_cast<char>(&__it - __collatenames)));
270
271 // TODO Add digraph support:
272 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
273
274 return string_type();
275 }
276
277 template<typename _Ch_type>
278 template<typename _Fwd_iter>
279 typename regex_traits<_Ch_type>::char_class_type
281 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
282 {
283 typedef std::ctype<char_type> __ctype_type;
284 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
285
286 // Mappings from class name to class mask.
287 static const pair<const char*, char_class_type> __classnames[] =
288 {
289 {"d", ctype_base::digit},
290 {"w", {ctype_base::alnum, _RegexMask::_S_under}},
291 {"s", ctype_base::space},
292 {"alnum", ctype_base::alnum},
293 {"alpha", ctype_base::alpha},
294 {"blank", ctype_base::blank},
295 {"cntrl", ctype_base::cntrl},
296 {"digit", ctype_base::digit},
297 {"graph", ctype_base::graph},
298 {"lower", ctype_base::lower},
299 {"print", ctype_base::print},
300 {"punct", ctype_base::punct},
301 {"space", ctype_base::space},
302 {"upper", ctype_base::upper},
303 {"xdigit", ctype_base::xdigit},
304 };
305
306 string __s;
307 for (; __first != __last; ++__first)
308 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
309
310 for (const auto& __it : __classnames)
311 if (__s == __it.first)
312 {
313 if (__icase
314 && ((__it.second
315 & (ctype_base::lower | ctype_base::upper)) != 0))
316 return ctype_base::alpha;
317 return __it.second;
318 }
319 return 0;
320 }
321
322 template<typename _Ch_type>
323 bool
325 isctype(_Ch_type __c, char_class_type __f) const
326 {
327 typedef std::ctype<char_type> __ctype_type;
328 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
329
330 return __fctyp.is(__f._M_base, __c)
331 // [[:w:]]
332 || ((__f._M_extended & _RegexMask::_S_under)
333 && __c == __fctyp.widen('_'));
334 }
335
336 template<typename _Ch_type>
337 int
339 value(_Ch_type __ch, int __radix) const
340 {
341 std::basic_istringstream<char_type> __is(string_type(1, __ch));
342 long __v;
343 if (__radix == 8)
344 __is >> std::oct;
345 else if (__radix == 16)
346 __is >> std::hex;
347 __is >> __v;
348 return __is.fail() ? -1 : __v;
349 }
350
351 template<typename _Bi_iter, typename _Alloc>
352 template<typename _Out_iter>
353 _Out_iter
355 format(_Out_iter __out,
356 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
357 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
358 match_flag_type __flags) const
359 {
360 __glibcxx_assert( ready() );
361 regex_traits<char_type> __traits;
362 typedef std::ctype<char_type> __ctype_type;
363 const __ctype_type&
364 __fctyp(use_facet<__ctype_type>(__traits.getloc()));
365
366 auto __output = [&](size_t __idx)
367 {
368 auto& __sub = (*this)[__idx];
369 if (__sub.matched)
370 __out = std::copy(__sub.first, __sub.second, __out);
371 };
372
373 if (__flags & regex_constants::format_sed)
374 {
375 bool __escaping = false;
376 for (; __fmt_first != __fmt_last; __fmt_first++)
377 {
378 if (__escaping)
379 {
380 __escaping = false;
381 if (__fctyp.is(__ctype_type::digit, *__fmt_first))
382 __output(__traits.value(*__fmt_first, 10));
383 else
384 *__out++ = *__fmt_first;
385 continue;
386 }
387 if (*__fmt_first == '\\')
388 {
389 __escaping = true;
390 continue;
391 }
392 if (*__fmt_first == '&')
393 {
394 __output(0);
395 continue;
396 }
397 *__out++ = *__fmt_first;
398 }
399 if (__escaping)
400 *__out++ = '\\';
401 }
402 else
403 {
404 while (1)
405 {
406 auto __next = std::find(__fmt_first, __fmt_last, '$');
407 if (__next == __fmt_last)
408 break;
409
410 __out = std::copy(__fmt_first, __next, __out);
411
412 auto __eat = [&](char __ch) -> bool
413 {
414 if (*__next == __ch)
415 {
416 ++__next;
417 return true;
418 }
419 return false;
420 };
421
422 if (++__next == __fmt_last)
423 *__out++ = '$';
424 else if (__eat('$'))
425 *__out++ = '$';
426 else if (__eat('&'))
427 __output(0);
428 else if (__eat('`'))
429 {
430 auto& __sub = _M_prefix();
431 if (__sub.matched)
432 __out = std::copy(__sub.first, __sub.second, __out);
433 }
434 else if (__eat('\''))
435 {
436 auto& __sub = _M_suffix();
437 if (__sub.matched)
438 __out = std::copy(__sub.first, __sub.second, __out);
439 }
440 else if (__fctyp.is(__ctype_type::digit, *__next))
441 {
442 long __num = __traits.value(*__next, 10);
443 if (++__next != __fmt_last
444 && __fctyp.is(__ctype_type::digit, *__next))
445 {
446 __num *= 10;
447 __num += __traits.value(*__next++, 10);
448 }
449 if (0 <= __num && __num < this->size())
450 __output(__num);
451 }
452 else
453 *__out++ = '$';
454 __fmt_first = __next;
455 }
456 __out = std::copy(__fmt_first, __fmt_last, __out);
457 }
458 return __out;
459 }
460
461 template<typename _Out_iter, typename _Bi_iter,
462 typename _Rx_traits, typename _Ch_type>
463 _Out_iter
464 __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
465 const basic_regex<_Ch_type, _Rx_traits>& __e,
466 const _Ch_type* __fmt, size_t __len,
468 {
469 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
470 _IterT __i(__first, __last, __e, __flags);
471 _IterT __end;
472 if (__i == __end)
473 {
474 if (!(__flags & regex_constants::format_no_copy))
475 __out = std::copy(__first, __last, __out);
476 }
477 else
478 {
479 sub_match<_Bi_iter> __last;
480 for (; __i != __end; ++__i)
481 {
482 if (!(__flags & regex_constants::format_no_copy))
483 __out = std::copy(__i->prefix().first, __i->prefix().second,
484 __out);
485 __out = __i->format(__out, __fmt, __fmt + __len, __flags);
486 __last = __i->suffix();
488 break;
489 }
490 if (!(__flags & regex_constants::format_no_copy))
491 __out = std::copy(__last.first, __last.second, __out);
492 }
493 return __out;
494 }
495
496 template<typename _Bi_iter,
497 typename _Ch_type,
498 typename _Rx_traits>
499 bool
501 operator==(const regex_iterator& __rhs) const noexcept
502 {
503 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
504 return true;
505 return _M_pregex == __rhs._M_pregex
506 && _M_begin == __rhs._M_begin
507 && _M_end == __rhs._M_end
508 && _M_flags == __rhs._M_flags
509 && _M_match[0] == __rhs._M_match[0];
510 }
511
512 template<typename _Bi_iter,
513 typename _Ch_type,
514 typename _Rx_traits>
515 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
518 {
519 // In all cases in which the call to regex_search returns true,
520 // match.prefix().first shall be equal to the previous value of
521 // match[0].second, and for each index i in the half-open range
522 // [0, match.size()) for which match[i].matched is true,
523 // match[i].position() shall return distance(begin, match[i].first).
524 // [28.12.1.4.5]
525 if (_M_match[0].matched)
526 {
527 auto __start = _M_match[0].second;
528 auto __prefix_first = _M_match[0].second;
529 if (_M_match[0].first == _M_match[0].second)
530 {
531 if (__start == _M_end)
532 {
533 _M_pregex = nullptr;
534 return *this;
535 }
536 else
537 {
538 if (regex_search(__start, _M_end, _M_match, *_M_pregex,
539 _M_flags
542 {
543 __glibcxx_assert(_M_match[0].matched);
544 auto& __prefix = _M_match._M_prefix();
545 __prefix.first = __prefix_first;
546 __prefix.matched = __prefix.first != __prefix.second;
547 // [28.12.1.4.5]
548 _M_match._M_begin = _M_begin;
549 return *this;
550 }
551 else
552 ++__start;
553 }
554 }
556 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
557 {
558 __glibcxx_assert(_M_match[0].matched);
559 auto& __prefix = _M_match._M_prefix();
560 __prefix.first = __prefix_first;
561 __prefix.matched = __prefix.first != __prefix.second;
562 // [28.12.1.4.5]
563 _M_match._M_begin = _M_begin;
564 }
565 else
566 _M_pregex = nullptr;
567 }
568 return *this;
569 }
570
571 template<typename _Bi_iter,
572 typename _Ch_type,
573 typename _Rx_traits>
574 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
576 operator=(const regex_token_iterator& __rhs)
577 {
578 _M_position = __rhs._M_position;
579 _M_subs = __rhs._M_subs;
580 _M_n = __rhs._M_n;
581 _M_suffix = __rhs._M_suffix;
582 _M_has_m1 = __rhs._M_has_m1;
583 _M_normalize_result();
584 return *this;
585 }
586
587 template<typename _Bi_iter,
588 typename _Ch_type,
589 typename _Rx_traits>
590 bool
592 operator==(const regex_token_iterator& __rhs) const
593 {
594 if (_M_end_of_seq() && __rhs._M_end_of_seq())
595 return true;
596 if (_M_suffix.matched && __rhs._M_suffix.matched
597 && _M_suffix == __rhs._M_suffix)
598 return true;
599 if (_M_end_of_seq() || _M_suffix.matched
600 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
601 return false;
602 return _M_position == __rhs._M_position
603 && _M_n == __rhs._M_n
604 && _M_subs == __rhs._M_subs;
605 }
606
607 template<typename _Bi_iter,
608 typename _Ch_type,
609 typename _Rx_traits>
610 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
613 {
614 _Position __prev = _M_position;
615 if (_M_suffix.matched)
616 *this = regex_token_iterator();
617 else if (_M_n + 1 < _M_subs.size())
618 {
619 _M_n++;
620 _M_result = &_M_current_match();
621 }
622 else
623 {
624 _M_n = 0;
625 ++_M_position;
626 if (_M_position != _Position())
627 _M_result = &_M_current_match();
628 else if (_M_has_m1 && __prev->suffix().length() != 0)
629 {
630 _M_suffix.matched = true;
631 _M_suffix.first = __prev->suffix().first;
632 _M_suffix.second = __prev->suffix().second;
633 _M_result = &_M_suffix;
634 }
635 else
636 *this = regex_token_iterator();
637 }
638 return *this;
639 }
640
641 template<typename _Bi_iter,
642 typename _Ch_type,
643 typename _Rx_traits>
644 void
645 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
646 _M_init(_Bi_iter __a, _Bi_iter __b)
647 {
648 _M_has_m1 = false;
649 for (auto __it : _M_subs)
650 if (__it == -1)
651 {
652 _M_has_m1 = true;
653 break;
654 }
655 if (_M_position != _Position())
656 _M_result = &_M_current_match();
657 else if (_M_has_m1)
658 {
659 _M_suffix.matched = true;
660 _M_suffix.first = __a;
661 _M_suffix.second = __b;
662 _M_result = &_M_suffix;
663 }
664 else
665 _M_result = nullptr;
666 }
667
668 /// @endcond
669
670_GLIBCXX_END_NAMESPACE_VERSION
671} // namespace
_Out_iter __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex< _Ch_type, _Rx_traits > &__e, const _Ch_type *__fmt, size_t __len, regex_constants::match_flag_type __flags)
Determines if there is a match between the regular expression e and all of the character sequence [fi...
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2353
ISO C++ entities toplevel namespace is std.
ios_base & hex(ios_base &__base)
Calls base.setf(ios_base::hex, ios_base::basefield).
Definition: ios_base.h:1054
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:245
ios_base & oct(ios_base &__base)
Calls base.setf(ios_base::oct, ios_base::basefield).
Definition: ios_base.h:1062
constexpr syntax_option_type __polynomial
constexpr match_flag_type format_first_only
constexpr match_flag_type match_continuous
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
constexpr match_flag_type format_sed
constexpr match_flag_type match_not_null
constexpr match_flag_type format_no_copy
Controlling input for std::string.
Definition: sstream:538
Primary class template ctype facet.
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
regex_iterator & operator++()
Increments a regex_iterator.
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
regex_token_iterator & operator++()
Increments a regex_token_iterator.
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.