libstdc++
regex_automaton.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_automaton.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37 #ifdef _GLIBCXX_DEBUG
38  inline std::ostream&
39  _State_base::_M_print(std::ostream& ostr) const
40  {
41  switch (_M_opcode)
42  {
43  case _S_opcode_alternative:
44  case _S_opcode_repeat:
45  ostr << "alt next=" << _M_next << " alt=" << _M_alt;
46  break;
47  case _S_opcode_subexpr_begin:
48  ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
49  break;
50  case _S_opcode_subexpr_end:
51  ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
52  break;
53  case _S_opcode_backref:
54  ostr << "backref next=" << _M_next << " index=" << _M_backref_index;
55  break;
56  case _S_opcode_match:
57  ostr << "match next=" << _M_next;
58  break;
59  case _S_opcode_accept:
60  ostr << "accept next=" << _M_next;
61  break;
62  default:
63  ostr << "unknown next=" << _M_next;
64  break;
65  }
66  return ostr;
67  }
68 
69  // Prints graphviz dot commands for state.
70  inline std::ostream&
71  _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const
72  {
73  switch (_M_opcode)
74  {
75  case _S_opcode_alternative:
76  case _S_opcode_repeat:
77  __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
78  << __id << " -> " << _M_next
79  << " [label=\"next\", tailport=\"s\"];\n"
80  << __id << " -> " << _M_alt
81  << " [label=\"alt\", tailport=\"n\"];\n";
82  break;
83  case _S_opcode_backref:
84  __ostr << __id << " [label=\"" << __id << "\\nBACKREF "
85  << _M_subexpr << "\"];\n"
86  << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
87  break;
88  case _S_opcode_line_begin_assertion:
89  __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n"
90  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
91  break;
92  case _S_opcode_line_end_assertion:
93  __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n"
94  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
95  break;
96  case _S_opcode_word_boundary:
97  __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY "
98  << _M_neg << "\"];\n"
99  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
100  break;
101  case _S_opcode_subexpr_lookahead:
102  __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n"
103  << __id << " -> " << _M_next
104  << " [label=\"epsilon\", tailport=\"s\"];\n"
105  << __id << " -> " << _M_alt
106  << " [label=\"<assert>\", tailport=\"n\"];\n";
107  break;
108  case _S_opcode_subexpr_begin:
109  __ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
110  << _M_subexpr << "\"];\n"
111  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
112  break;
113  case _S_opcode_subexpr_end:
114  __ostr << __id << " [label=\"" << __id << "\\nSEND "
115  << _M_subexpr << "\"];\n"
116  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
117  break;
118  case _S_opcode_dummy:
119  break;
120  case _S_opcode_match:
121  __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
122  << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
123  break;
124  case _S_opcode_accept:
125  __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
126  break;
127  default:
128  _GLIBCXX_DEBUG_ASSERT(false);
129  break;
130  }
131  return __ostr;
132  }
133 
134  template<typename _TraitsT>
135  std::ostream&
136  _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
137  {
138  __ostr << "digraph _Nfa {\n"
139  " rankdir=LR;\n";
140  for (size_t __i = 0; __i < this->size(); ++__i)
141  (*this)[__i]._M_dot(__ostr, __i);
142  __ostr << "}\n";
143  return __ostr;
144  }
145 #endif
146 
147  template<typename _TraitsT>
148  _StateIdT
149  _NFA<_TraitsT>::_M_insert_backref(size_t __index)
150  {
151  // To figure out whether a backref is valid, a stack is used to store
152  // unfinished sub-expressions. For example, when parsing
153  // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
154  // sub expressions are parsed or partially parsed(in the stack), aka,
155  // "(a..", "(b)" and "(c..").
156  // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
157  // time, "\\2" is valid, but "\\1" and "\\3" are not.
158  if (__index >= _M_subexpr_count)
159  __throw_regex_error(regex_constants::error_backref);
160  for (auto __it : this->_M_paren_stack)
161  if (__index == __it)
162  __throw_regex_error(regex_constants::error_backref);
163  this->_M_has_backref = true;
164  _StateT __tmp(_S_opcode_backref);
165  __tmp._M_backref_index = __index;
166  return _M_insert_state(std::move(__tmp));
167  }
168 
169  template<typename _TraitsT>
170  void
171  _NFA<_TraitsT>::_M_eliminate_dummy()
172  {
173  for (auto& __it : *this)
174  {
175  while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode
176  == _S_opcode_dummy)
177  __it._M_next = (*this)[__it._M_next]._M_next;
178  if (__it._M_opcode == _S_opcode_alternative
179  || __it._M_opcode == _S_opcode_repeat
180  || __it._M_opcode == _S_opcode_subexpr_lookahead)
181  while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode
182  == _S_opcode_dummy)
183  __it._M_alt = (*this)[__it._M_alt]._M_next;
184  }
185  }
186 
187  // Just apply DFS on the sequence and re-link their links.
188  template<typename _TraitsT>
189  _StateSeq<_TraitsT>
190  _StateSeq<_TraitsT>::_M_clone()
191  {
193  std::stack<_StateIdT> __stack;
194  __stack.push(_M_start);
195  while (!__stack.empty())
196  {
197  auto __u = __stack.top();
198  __stack.pop();
199  auto __dup = _M_nfa[__u];
200  // _M_insert_state() never return -1
201  auto __id = _M_nfa._M_insert_state(__dup);
202  __m[__u] = __id;
203  if (__dup._M_opcode == _S_opcode_alternative
204  || __dup._M_opcode == _S_opcode_repeat
205  || __dup._M_opcode == _S_opcode_subexpr_lookahead)
206  if (__dup._M_alt != _S_invalid_state_id
207  && __m.count(__dup._M_alt) == 0)
208  __stack.push(__dup._M_alt);
209  if (__u == _M_end)
210  continue;
211  if (__dup._M_next != _S_invalid_state_id
212  && __m.count(__dup._M_next) == 0)
213  __stack.push(__dup._M_next);
214  }
215  for (auto __it : __m)
216  {
217  auto __v = __it.second;
218  auto& __ref = _M_nfa[__v];
219  if (__ref._M_next != _S_invalid_state_id)
220  {
221  _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_next) > 0);
222  __ref._M_next = __m[__ref._M_next];
223  }
224  if (__ref._M_opcode == _S_opcode_alternative
225  || __ref._M_opcode == _S_opcode_repeat
226  || __ref._M_opcode == _S_opcode_subexpr_lookahead)
227  if (__ref._M_alt != _S_invalid_state_id)
228  {
229  _GLIBCXX_DEBUG_ASSERT(__m.count(__ref._M_alt) > 0);
230  __ref._M_alt = __m[__ref._M_alt];
231  }
232  }
233  return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]);
234  }
235 
236 _GLIBCXX_END_NAMESPACE_VERSION
237 } // namespace __detail
238 } // namespace
bool empty() const
Definition: stl_stack.h:149
void push(const value_type &__x)
Add data to the top of the stack.
Definition: stl_stack.h:189
reference top()
Definition: stl_stack.h:162
void pop()
Removes first element.
Definition: stl_stack.h:215
A standard container made up of (key,value) pairs, which can be retrieved based on a key...
Definition: stl_map.h:96
constexpr error_type error_backref(_S_error_backref)
size_type count(const key_type &__x) const
Finds the number of elements with given key.
Definition: stl_map.h:891
A standard container giving FILO behavior.
Definition: stl_stack.h:99
ISO C++ entities toplevel namespace is std.