libstdc++
regex_automaton.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2020 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_automaton.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37 #ifdef _GLIBCXX_DEBUG
38  inline std::ostream&
39  _State_base::_M_print(std::ostream& ostr) const
40  {
41  switch (_M_opcode)
42  {
43  case _S_opcode_alternative:
44  case _S_opcode_repeat:
45  ostr << "alt next=" << _M_next << " alt=" << _M_alt;
46  break;
47  case _S_opcode_subexpr_begin:
48  ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
49  break;
50  case _S_opcode_subexpr_end:
51  ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
52  break;
53  case _S_opcode_backref:
54  ostr << "backref next=" << _M_next << " index=" << _M_backref_index;
55  break;
56  case _S_opcode_match:
57  ostr << "match next=" << _M_next;
58  break;
59  case _S_opcode_accept:
60  ostr << "accept next=" << _M_next;
61  break;
62  default:
63  ostr << "unknown next=" << _M_next;
64  break;
65  }
66  return ostr;
67  }
68 
69  // Prints graphviz dot commands for state.
70  inline std::ostream&
71  _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const
72  {
73  switch (_M_opcode)
74  {
75  case _S_opcode_alternative:
76  case _S_opcode_repeat:
77  __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
78  << __id << " -> " << _M_next
79  << " [label=\"next\", tailport=\"s\"];\n"
80  << __id << " -> " << _M_alt
81  << " [label=\"alt\", tailport=\"n\"];\n";
82  break;
83  case _S_opcode_backref:
84  __ostr << __id << " [label=\"" << __id << "\\nBACKREF "
85  << _M_subexpr << "\"];\n"
86  << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
87  break;
88  case _S_opcode_line_begin_assertion:
89  __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n"
90  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
91  break;
92  case _S_opcode_line_end_assertion:
93  __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n"
94  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
95  break;
96  case _S_opcode_word_boundary:
97  __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY "
98  << _M_neg << "\"];\n"
99  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
100  break;
101  case _S_opcode_subexpr_lookahead:
102  __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n"
103  << __id << " -> " << _M_next
104  << " [label=\"epsilon\", tailport=\"s\"];\n"
105  << __id << " -> " << _M_alt
106  << " [label=\"<assert>\", tailport=\"n\"];\n";
107  break;
108  case _S_opcode_subexpr_begin:
109  __ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
110  << _M_subexpr << "\"];\n"
111  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
112  break;
113  case _S_opcode_subexpr_end:
114  __ostr << __id << " [label=\"" << __id << "\\nSEND "
115  << _M_subexpr << "\"];\n"
116  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
117  break;
118  case _S_opcode_dummy:
119  break;
120  case _S_opcode_match:
121  __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
122  << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
123  break;
124  case _S_opcode_accept:
125  __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
126  break;
127  default:
128  _GLIBCXX_DEBUG_ASSERT(false);
129  break;
130  }
131  return __ostr;
132  }
133 
134  template<typename _TraitsT>
135  std::ostream&
136  _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
137  {
138  __ostr << "digraph _Nfa {\n"
139  " rankdir=LR;\n";
140  for (size_t __i = 0; __i < this->size(); ++__i)
141  (*this)[__i]._M_dot(__ostr, __i);
142  __ostr << "}\n";
143  return __ostr;
144  }
145 #endif
146 
147  template<typename _TraitsT>
148  _StateIdT
149  _NFA<_TraitsT>::_M_insert_backref(size_t __index)
150  {
151  if (this->_M_flags & regex_constants::__polynomial)
152  __throw_regex_error(regex_constants::error_complexity,
153  "Unexpected back-reference in polynomial mode.");
154  // To figure out whether a backref is valid, a stack is used to store
155  // unfinished sub-expressions. For example, when parsing
156  // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
157  // sub expressions are parsed or partially parsed(in the stack), aka,
158  // "(a..", "(b)" and "(c..").
159  // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
160  // time, "\\2" is valid, but "\\1" and "\\3" are not.
161  if (__index >= _M_subexpr_count)
162  __throw_regex_error(
164  "Back-reference index exceeds current sub-expression count.");
165  for (auto __it : this->_M_paren_stack)
166  if (__index == __it)
167  __throw_regex_error(
169  "Back-reference referred to an opened sub-expression.");
170  this->_M_has_backref = true;
171  _StateT __tmp(_S_opcode_backref);
172  __tmp._M_backref_index = __index;
173  return _M_insert_state(std::move(__tmp));
174  }
175 
176  template<typename _TraitsT>
177  void
178  _NFA<_TraitsT>::_M_eliminate_dummy()
179  {
180  for (auto& __it : *this)
181  {
182  while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode()
183  == _S_opcode_dummy)
184  __it._M_next = (*this)[__it._M_next]._M_next;
185  if (__it._M_has_alt())
186  while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode()
187  == _S_opcode_dummy)
188  __it._M_alt = (*this)[__it._M_alt]._M_next;
189  }
190  }
191 
192  // Just apply DFS on the sequence and re-link their links.
193  template<typename _TraitsT>
194  _StateSeq<_TraitsT>
195  _StateSeq<_TraitsT>::_M_clone()
196  {
198  std::stack<_StateIdT> __stack;
199  __stack.push(_M_start);
200  while (!__stack.empty())
201  {
202  auto __u = __stack.top();
203  __stack.pop();
204  auto __dup = _M_nfa[__u];
205  // _M_insert_state() never return -1
206  auto __id = _M_nfa._M_insert_state(std::move(__dup));
207  __m[__u] = __id;
208  if (__dup._M_has_alt())
209  if (__dup._M_alt != _S_invalid_state_id
210  && __m.count(__dup._M_alt) == 0)
211  __stack.push(__dup._M_alt);
212  if (__u == _M_end)
213  continue;
214  if (__dup._M_next != _S_invalid_state_id
215  && __m.count(__dup._M_next) == 0)
216  __stack.push(__dup._M_next);
217  }
218  for (auto __it : __m)
219  {
220  auto __v = __it.second;
221  auto& __ref = _M_nfa[__v];
222  if (__ref._M_next != _S_invalid_state_id)
223  __ref._M_next = __m.find(__ref._M_next)->second;
224  if (__ref._M_has_alt() && __ref._M_alt != _S_invalid_state_id)
225  __ref._M_alt = __m.find(__ref._M_alt)->second;
226  }
227  return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]);
228  }
229 } // namespace __detail
230 
231 _GLIBCXX_END_NAMESPACE_VERSION
232 } // namespace
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:101
ISO C++ entities toplevel namespace is std.
constexpr syntax_option_type __polynomial
constexpr error_type error_backref(_S_error_backref)
constexpr error_type error_complexity(_S_error_complexity)
A standard container made up of (key,value) pairs, which can be retrieved based on a key,...
Definition: stl_map.h:101
size_type count(const key_type &__x) const
Finds the number of elements with given key.
Definition: stl_map.h:1215
A standard container giving FILO behavior.
Definition: stl_stack.h:100
void pop()
Removes first element.
Definition: stl_stack.h:272
void push(const value_type &__x)
Add data to the top of the stack.
Definition: stl_stack.h:239
bool empty() const
Definition: stl_stack.h:199
reference top()
Definition: stl_stack.h:212