libstdc++
regex_scanner.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_scanner.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37  /**
38  * @addtogroup regex-detail
39  * @{
40  */
41 
42  struct _ScannerBase
43  {
44  public:
45  /// Token types returned from the scanner.
46  enum _TokenT
47  {
48  _S_token_anychar,
49  _S_token_ord_char,
50  _S_token_oct_num,
51  _S_token_hex_num,
52  _S_token_backref,
53  _S_token_subexpr_begin,
54  _S_token_subexpr_no_group_begin,
55  _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
56  _S_token_subexpr_end,
57  _S_token_bracket_begin,
58  _S_token_bracket_neg_begin,
59  _S_token_bracket_end,
60  _S_token_interval_begin,
61  _S_token_interval_end,
62  _S_token_quoted_class,
63  _S_token_char_class_name,
64  _S_token_collsymbol,
65  _S_token_equiv_class_name,
66  _S_token_opt,
67  _S_token_or,
68  _S_token_closure0,
69  _S_token_closure1,
70  _S_token_line_begin,
71  _S_token_line_end,
72  _S_token_word_bound, // neg if _M_value[0] == 'n'
73  _S_token_comma,
74  _S_token_dup_count,
75  _S_token_eof,
76  _S_token_unknown
77  };
78 
79  protected:
81 
82  enum _StateT
83  {
84  _S_state_normal,
85  _S_state_in_brace,
86  _S_state_in_bracket,
87  };
88 
89  protected:
90  _ScannerBase(_FlagT __flags)
91  : _M_state(_S_state_normal),
92  _M_flags(__flags),
93  _M_escape_tbl(_M_is_ecma()
94  ? _M_ecma_escape_tbl
95  : _M_awk_escape_tbl),
96  _M_spec_char(_M_is_ecma()
97  ? _M_ecma_spec_char
98  : _M_flags & regex_constants::basic
99  ? _M_basic_spec_char
100  : _M_flags & regex_constants::extended
101  ? _M_extended_spec_char
102  : _M_flags & regex_constants::grep
103  ? ".[\\*^$\n"
104  : _M_flags & regex_constants::egrep
105  ? ".[\\()*+?{|^$\n"
106  : _M_flags & regex_constants::awk
107  ? _M_extended_spec_char
108  : nullptr),
109  _M_at_bracket_start(false)
110  { __glibcxx_assert(_M_spec_char); }
111 
112  protected:
113  const char*
114  _M_find_escape(char __c)
115  {
116  auto __it = _M_escape_tbl;
117  for (; __it->first != '\0'; ++__it)
118  if (__it->first == __c)
119  return &__it->second;
120  return nullptr;
121  }
122 
123  bool
124  _M_is_ecma() const
125  { return _M_flags & regex_constants::ECMAScript; }
126 
127  bool
128  _M_is_basic() const
129  { return _M_flags & (regex_constants::basic | regex_constants::grep); }
130 
131  bool
132  _M_is_extended() const
133  {
134  return _M_flags & (regex_constants::extended
137  }
138 
139  bool
140  _M_is_grep() const
141  { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
142 
143  bool
144  _M_is_awk() const
145  { return _M_flags & regex_constants::awk; }
146 
147  protected:
148  // TODO: Make them static in the next abi change.
149  const std::pair<char, _TokenT> _M_token_tbl[9] =
150  {
151  {'^', _S_token_line_begin},
152  {'$', _S_token_line_end},
153  {'.', _S_token_anychar},
154  {'*', _S_token_closure0},
155  {'+', _S_token_closure1},
156  {'?', _S_token_opt},
157  {'|', _S_token_or},
158  {'\n', _S_token_or}, // grep and egrep
159  {'\0', _S_token_or},
160  };
161  const std::pair<char, char> _M_ecma_escape_tbl[8] =
162  {
163  {'0', '\0'},
164  {'b', '\b'},
165  {'f', '\f'},
166  {'n', '\n'},
167  {'r', '\r'},
168  {'t', '\t'},
169  {'v', '\v'},
170  {'\0', '\0'},
171  };
172  const std::pair<char, char> _M_awk_escape_tbl[11] =
173  {
174  {'"', '"'},
175  {'/', '/'},
176  {'\\', '\\'},
177  {'a', '\a'},
178  {'b', '\b'},
179  {'f', '\f'},
180  {'n', '\n'},
181  {'r', '\r'},
182  {'t', '\t'},
183  {'v', '\v'},
184  {'\0', '\0'},
185  };
186  const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
187  const char* _M_basic_spec_char = ".[\\*^$";
188  const char* _M_extended_spec_char = ".[\\()*+?{|^$";
189 
190  _StateT _M_state;
191  _FlagT _M_flags;
192  _TokenT _M_token;
193  const std::pair<char, char>* _M_escape_tbl;
194  const char* _M_spec_char;
195  bool _M_at_bracket_start;
196  };
197 
198  /**
199  * @brief Scans an input range for regex tokens.
200  *
201  * The %_Scanner class interprets the regular expression pattern in
202  * the input range passed to its constructor as a sequence of parse
203  * tokens passed to the regular expression compiler. The sequence
204  * of tokens provided depends on the flag settings passed to the
205  * constructor: different regular expression grammars will interpret
206  * the same input pattern in syntactically different ways.
207  */
208  template<typename _CharT>
209  class _Scanner
210  : public _ScannerBase
211  {
212  public:
213  typedef const _CharT* _IterT;
216  typedef const std::ctype<_CharT> _CtypeT;
217 
218  _Scanner(_IterT __begin, _IterT __end,
219  _FlagT __flags, std::locale __loc);
220 
221  void
222  _M_advance();
223 
224  _TokenT
225  _M_get_token() const
226  { return _M_token; }
227 
228  const _StringT&
229  _M_get_value() const
230  { return _M_value; }
231 
232 #ifdef _GLIBCXX_DEBUG
233  std::ostream&
234  _M_print(std::ostream&);
235 #endif
236 
237  private:
238  void
239  _M_scan_normal();
240 
241  void
242  _M_scan_in_bracket();
243 
244  void
245  _M_scan_in_brace();
246 
247  void
248  _M_eat_escape_ecma();
249 
250  void
251  _M_eat_escape_posix();
252 
253  void
254  _M_eat_escape_awk();
255 
256  void
257  _M_eat_class(char);
258 
259  _IterT _M_current;
260  _IterT _M_end;
261  _CtypeT& _M_ctype;
262  _StringT _M_value;
263  void (_Scanner::* _M_eat_escape)();
264  };
265 
266  //@} regex-detail
267 _GLIBCXX_END_NAMESPACE_VERSION
268 } // namespace __detail
269 } // namespace std
270 
271 #include <bits/regex_scanner.tcc>
constexpr syntax_option_type grep
constexpr syntax_option_type egrep
syntax_option_type
This is a bitmask type indicating how to interpret the regex.
Scans an input range for regex tokens.
constexpr syntax_option_type basic
constexpr syntax_option_type awk
Primary class template ctype facet.This template class defines classification and conversion function...
constexpr syntax_option_type ECMAScript
constexpr syntax_option_type extended
Container class for localization functionality.The locale class is first a class wrapper for C librar...
ISO C++ entities toplevel namespace is std.