libstdc++
locale_conv.h
Go to the documentation of this file.
1 // wstring_convert implementation -*- C++ -*-
2 
3 // Copyright (C) 2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/locale_conv.h
26  * This is an internal header file, included by other library headers.
27  * Do not attempt to use it directly. @headername{locale}
28  */
29 
30 #ifndef _LOCALE_CONV_H
31 #define _LOCALE_CONV_H 1
32 
33 #if __cplusplus < 201103L
34 # include <bits/c++0x_warning.h>
35 #else
36 
37 #include <streambuf>
38 #include "stringfwd.h"
39 #include "allocator.h"
40 #include "codecvt.h"
41 #include "unique_ptr.h"
42 
43 namespace std _GLIBCXX_VISIBILITY(default)
44 {
45 _GLIBCXX_BEGIN_NAMESPACE_VERSION
46 
47  /**
48  * @addtogroup locales
49  * @{
50  */
51 
52  template<typename _OutStr, typename _InChar, typename _Codecvt,
53  typename _State, typename _Fn>
54  bool
55  __do_str_codecvt(const _InChar* __first, const _InChar* __last,
56  _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
57  size_t& __count, _Fn __fn)
58  {
59  if (__first == __last)
60  {
61  __outstr.clear();
62  __count = 0;
63  return true;
64  }
65 
66  size_t __outchars = 0;
67  auto __next = __first;
68  const auto __maxlen = __cvt.max_length() + 1;
69 
70  codecvt_base::result __result;
71  do
72  {
73  __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
74  auto __outnext = &__outstr.front() + __outchars;
75  auto const __outlast = &__outstr.back() + 1;
76  __result = (__cvt.*__fn)(__state, __next, __last, __next,
77  __outnext, __outlast, __outnext);
78  __outchars = __outnext - &__outstr.front();
79  }
80  while (__result == codecvt_base::partial && __next != __last
81  && (__outstr.size() - __outchars) < __maxlen);
82 
83  if (__result == codecvt_base::error)
84  return false;
85 
86  if (__result == codecvt_base::noconv)
87  {
88  __outstr.assign(__first, __last);
89  __count = __last - __first;
90  }
91  else
92  {
93  __outstr.resize(__outchars);
94  __count = __next - __first;
95  }
96 
97  return true;
98  }
99 
100  // Convert narrow character string to wide.
101  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
102  inline bool
103  __str_codecvt_in(const char* __first, const char* __last,
104  basic_string<_CharT, _Traits, _Alloc>& __outstr,
105  const codecvt<_CharT, char, _State>& __cvt,
106  _State& __state, size_t& __count)
107  {
108  using _Codecvt = codecvt<_CharT, char, _State>;
109  using _ConvFn
110  = codecvt_base::result
111  (_Codecvt::*)(_State&, const char*, const char*, const char*&,
112  _CharT*, _CharT*, _CharT*&) const;
113  _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
114  return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
115  __count, __fn);
116  }
117 
118  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
119  inline bool
120  __str_codecvt_in(const char* __first, const char* __last,
121  basic_string<_CharT, _Traits, _Alloc>& __outstr,
122  const codecvt<_CharT, char, _State>& __cvt)
123  {
124  _State __state = {};
125  size_t __n;
126  return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
127  }
128 
129  // Convert wide character string to narrow.
130  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
131  inline bool
132  __str_codecvt_out(const _CharT* __first, const _CharT* __last,
133  basic_string<char, _Traits, _Alloc>& __outstr,
134  const codecvt<_CharT, char, _State>& __cvt,
135  _State& __state, size_t& __count)
136  {
137  using _Codecvt = codecvt<_CharT, char, _State>;
138  using _ConvFn
139  = codecvt_base::result
140  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
141  char*, char*, char*&) const;
142  _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
143  return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
144  __count, __fn);
145  }
146 
147  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
148  inline bool
149  __str_codecvt_out(const _CharT* __first, const _CharT* __last,
150  basic_string<char, _Traits, _Alloc>& __outstr,
151  const codecvt<_CharT, char, _State>& __cvt)
152  {
153  _State __state = {};
154  size_t __n;
155  return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
156  }
157 
158 #ifdef _GLIBCXX_USE_WCHAR_T
159 
160 _GLIBCXX_BEGIN_NAMESPACE_CXX11
161 
162  /// String conversions
163  template<typename _Codecvt, typename _Elem = wchar_t,
164  typename _Wide_alloc = allocator<_Elem>,
165  typename _Byte_alloc = allocator<char>>
167  {
168  public:
171  typedef typename _Codecvt::state_type state_type;
172  typedef typename wide_string::traits_type::int_type int_type;
173 
174  /** Default constructor.
175  *
176  * @param __pcvt The facet to use for conversions.
177  *
178  * Takes ownership of @p __pcvt and will delete it in the destructor.
179  */
180  explicit
181  wstring_convert(_Codecvt* __pcvt = new _Codecvt()) : _M_cvt(__pcvt)
182  {
183  if (!_M_cvt)
184  __throw_logic_error("wstring_convert");
185  }
186 
187  /** Construct with an initial converstion state.
188  *
189  * @param __pcvt The facet to use for conversions.
190  * @param __state Initial conversion state.
191  *
192  * Takes ownership of @p __pcvt and will delete it in the destructor.
193  * The object's conversion state will persist between conversions.
194  */
195  wstring_convert(_Codecvt* __pcvt, state_type __state)
196  : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
197  {
198  if (!_M_cvt)
199  __throw_logic_error("wstring_convert");
200  }
201 
202  /** Construct with error strings.
203  *
204  * @param __byte_err A string to return on failed conversions.
205  * @param __wide_err A wide string to return on failed conversions.
206  */
207  explicit
208  wstring_convert(const byte_string& __byte_err,
209  const wide_string& __wide_err = wide_string())
210  : _M_cvt(new _Codecvt),
211  _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
212  _M_with_strings(true)
213  {
214  if (!_M_cvt)
215  __throw_logic_error("wstring_convert");
216  }
217 
218  ~wstring_convert() = default;
219 
220  // _GLIBCXX_RESOLVE_LIB_DEFECTS
221  // 2176. Special members for wstring_convert and wbuffer_convert
222  wstring_convert(const wstring_convert&) = delete;
223  wstring_convert& operator=(const wstring_convert&) = delete;
224 
225  /// @{ Convert from bytes.
226  wide_string
227  from_bytes(char __byte)
228  {
229  char __bytes[2] = { __byte };
230  return from_bytes(__bytes, __bytes+1);
231  }
232 
233  wide_string
234  from_bytes(const char* __ptr)
235  { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
236 
237  wide_string
238  from_bytes(const byte_string& __str)
239  {
240  auto __ptr = __str.data();
241  return from_bytes(__ptr, __ptr + __str.size());
242  }
243 
244  wide_string
245  from_bytes(const char* __first, const char* __last)
246  {
247  if (!_M_with_cvtstate)
248  _M_state = state_type();
249  wide_string __out{ _M_wide_err_string.get_allocator() };
250  if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
251  _M_count))
252  return __out;
253  if (_M_with_strings)
254  return _M_wide_err_string;
255  __throw_range_error("wstring_convert::from_bytes");
256  }
257  /// @}
258 
259  /// @{ Convert to bytes.
260  byte_string
261  to_bytes(_Elem __wchar)
262  {
263  _Elem __wchars[2] = { __wchar };
264  return to_bytes(__wchars, __wchars+1);
265  }
266 
267  byte_string
268  to_bytes(const _Elem* __ptr)
269  {
270  return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
271  }
272 
273  byte_string
274  to_bytes(const wide_string& __wstr)
275  {
276  auto __ptr = __wstr.data();
277  return to_bytes(__ptr, __ptr + __wstr.size());
278  }
279 
280  byte_string
281  to_bytes(const _Elem* __first, const _Elem* __last)
282  {
283  if (!_M_with_cvtstate)
284  _M_state = state_type();
285  byte_string __out{ _M_byte_err_string.get_allocator() };
286  if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
287  _M_count))
288  return __out;
289  if (_M_with_strings)
290  return _M_byte_err_string;
291  __throw_range_error("wstring_convert::to_bytes");
292  }
293  /// @}
294 
295  // _GLIBCXX_RESOLVE_LIB_DEFECTS
296  // 2174. wstring_convert::converted() should be noexcept
297  /// The number of elements successfully converted in the last conversion.
298  size_t converted() const noexcept { return _M_count; }
299 
300  /// The final conversion state of the last conversion.
301  state_type state() const { return _M_state; }
302 
303  private:
304  unique_ptr<_Codecvt> _M_cvt;
305  byte_string _M_byte_err_string;
306  wide_string _M_wide_err_string;
307  state_type _M_state = state_type();
308  size_t _M_count = 0;
309  bool _M_with_cvtstate = false;
310  bool _M_with_strings = false;
311  };
312 
313 _GLIBCXX_END_NAMESPACE_CXX11
314 
315  /// Buffer conversions
316  template<typename _Codecvt, typename _Elem = wchar_t,
317  typename _Tr = char_traits<_Elem>>
318  class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
319  {
321 
322  public:
323  typedef typename _Codecvt::state_type state_type;
324 
325  /** Default constructor.
326  *
327  * @param __bytebuf The underlying byte stream buffer.
328  * @param __pcvt The facet to use for conversions.
329  * @param __state Initial conversion state.
330  *
331  * Takes ownership of @p __pcvt and will delete it in the destructor.
332  */
333  explicit
334  wbuffer_convert(streambuf* __bytebuf = 0, _Codecvt* __pcvt = new _Codecvt,
335  state_type __state = state_type())
336  : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
337  {
338  if (!_M_cvt)
339  __throw_logic_error("wbuffer_convert");
340 
341  _M_always_noconv = _M_cvt->always_noconv();
342 
343  if (_M_buf)
344  {
345  this->setp(_M_put_area, _M_put_area + _S_buffer_length);
346  this->setg(_M_get_area + _S_putback_length,
347  _M_get_area + _S_putback_length,
348  _M_get_area + _S_putback_length);
349  }
350  }
351 
352  ~wbuffer_convert() = default;
353 
354  // _GLIBCXX_RESOLVE_LIB_DEFECTS
355  // 2176. Special members for wstring_convert and wbuffer_convert
356  wbuffer_convert(const wbuffer_convert&) = delete;
357  wbuffer_convert& operator=(const wbuffer_convert&) = delete;
358 
359  streambuf* rdbuf() const noexcept { return _M_buf; }
360 
361  streambuf*
362  rdbuf(streambuf *__bytebuf) noexcept
363  {
364  auto __prev = _M_buf;
365  _M_buf = __bytebuf;
366  return __prev;
367  }
368 
369  /// The conversion state following the last conversion.
370  state_type state() const noexcept { return _M_state; }
371 
372  protected:
373  int
375  { return _M_buf && _M_conv_put() && _M_buf->pubsync() ? 0 : -1; }
376 
379  {
380  if (!_M_buf || !_M_conv_put())
381  return _Tr::eof();
382  else if (!_Tr::eq_int_type(__out, _Tr::eof()))
383  return this->sputc(__out);
384  return _Tr::not_eof(__out);
385  }
386 
389  {
390  if (!_M_buf)
391  return _Tr::eof();
392 
393  if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
394  return _Tr::to_int_type(*this->gptr());
395  else
396  return _Tr::eof();
397  }
398 
399  streamsize
400  xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
401  {
402  if (!_M_buf || __n == 0)
403  return 0;
404  streamsize __done = 0;
405  do
406  {
407  auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
408  __n - __done);
409  _Tr::copy(this->pptr(), __s + __done, __nn);
410  this->pbump(__nn);
411  __done += __nn;
412  } while (__done < __n && _M_conv_put());
413  return __done;
414  }
415 
416  private:
417  // fill the get area from converted contents of the byte stream buffer
418  bool
419  _M_conv_get()
420  {
421  const streamsize __pb1 = this->gptr() - this->eback();
422  const streamsize __pb2 = _S_putback_length;
423  const streamsize __npb = std::min(__pb1, __pb2);
424 
425  _Tr::move(_M_get_area + _S_putback_length - __npb,
426  this->gptr() - __npb, __npb);
427 
428  streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
429  __nbytes = std::min(__nbytes, _M_buf->in_avail());
430  if (__nbytes < 1)
431  __nbytes == 1;
432  __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
433  if (__nbytes < 1)
434  return false;
435  __nbytes += _M_unconv;
436 
437  // convert _M_get_buf into _M_get_area
438 
439  _Elem* __outbuf = _M_get_area + _S_putback_length;
440  _Elem* __outnext = __outbuf;
441  const char* __bnext = _M_get_buf;
442 
443  codecvt_base::result __result;
444  if (_M_always_noconv)
445  __result = codecvt_base::noconv;
446  else
447  {
448  _Elem* __outend = _M_get_area + _S_buffer_length;
449 
450  __result = _M_cvt->in(_M_state,
451  __bnext, __bnext + __nbytes, __bnext,
452  __outbuf, __outend, __outnext);
453  }
454 
455  if (__result == codecvt_base::noconv)
456  {
457  // cast is safe because noconv means _Elem is same type as char
458  auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
459  _Tr::copy(__outbuf, __get_buf, __nbytes);
460  _M_unconv = 0;
461  return true;
462  }
463 
464  if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
465  char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
466 
467  this->setg(__outbuf, __outbuf, __outnext);
468 
469  return __result != codecvt_base::error;
470  }
471 
472  // unused
473  bool
474  _M_put(...)
475  { return false; }
476 
477  bool
478  _M_put(const char* __p, streamsize __n)
479  {
480  if (_M_buf->sputn(__p, __n) < __n)
481  return false;
482  }
483 
484  // convert the put area and write to the byte stream buffer
485  bool
486  _M_conv_put()
487  {
488  _Elem* const __first = this->pbase();
489  const _Elem* const __last = this->pptr();
490  const streamsize __pending = __last - __first;
491 
492  if (_M_always_noconv)
493  return _M_put(__first, __pending);
494 
495  char __outbuf[2 * _S_buffer_length];
496 
497  const _Elem* __next = __first;
498  const _Elem* __start;
499  do
500  {
501  __start = __next;
502  char* __outnext = __outbuf;
503  char* const __outlast = __outbuf + sizeof(__outbuf);
504  auto __result = _M_cvt->out(_M_state, __next, __last, __next,
505  __outnext, __outlast, __outnext);
506  if (__result == codecvt_base::error)
507  return false;
508  else if (__result == codecvt_base::noconv)
509  return _M_put(__next, __pending);
510 
511  if (!_M_put(__outbuf, __outnext - __outbuf))
512  return false;
513  }
514  while (__next != __last && __next != __start);
515 
516  if (__next != __last)
517  _Tr::move(__first, __next, __last - __next);
518 
519  this->pbump(__first - __next);
520  return __next != __first;
521  }
522 
523  streambuf* _M_buf;
524  unique_ptr<_Codecvt> _M_cvt;
525  state_type _M_state;
526 
527  static const streamsize _S_buffer_length = 32;
528  static const streamsize _S_putback_length = 3;
529  _Elem _M_put_area[_S_buffer_length];
530  _Elem _M_get_area[_S_buffer_length];
531  streamsize _M_unconv = 0;
532  char _M_get_buf[_S_buffer_length-_S_putback_length];
533  bool _M_always_noconv;
534  };
535 
536 #endif // _GLIBCXX_USE_WCHAR_T
537 
538  /// @} group locales
539 
540 _GLIBCXX_END_NAMESPACE_VERSION
541 } // namespace
542 
543 #endif // __cplusplus
544 
545 #endif /* _LOCALE_CONV_H */
wide_string from_bytes(const byte_string &__str)
Convert from bytes.
Definition: locale_conv.h:238
Basis for explicit traits specializations.
Definition: char_traits.h:227
state_type state() const noexcept
The conversion state following the last conversion.
Definition: locale_conv.h:370
wstring_convert(_Codecvt *__pcvt=new _Codecvt())
Definition: locale_conv.h:181
byte_string to_bytes(_Elem __wchar)
Convert to bytes.
Definition: locale_conv.h:261
The actual work of input and output (interface).
Definition: iosfwd:80
ptrdiff_t streamsize
Integral type for I/O operation counts and buffer sizes.
Definition: postypes.h:98
wide_string from_bytes(const char *__first, const char *__last)
Convert from bytes.
Definition: locale_conv.h:245
Managing sequences of characters and character-like objects.
_GLIBCXX14_CONSTEXPR const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:195
state_type state() const
The final conversion state of the last conversion.
Definition: locale_conv.h:301
wbuffer_convert(streambuf *__bytebuf=0, _Codecvt *__pcvt=new _Codecvt, state_type __state=state_type())
Definition: locale_conv.h:334
byte_string to_bytes(const wide_string &__wstr)
Convert to bytes.
Definition: locale_conv.h:274
wstring_convert(_Codecvt *__pcvt, state_type __state)
Definition: locale_conv.h:195
result out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
Definition: codecvt.h:116
String conversions.
Definition: locale_conv.h:166
wide_string from_bytes(const char *__ptr)
Convert from bytes.
Definition: locale_conv.h:234
wstring_convert(const byte_string &__byte_err, const wide_string &__wide_err=wide_string())
Definition: locale_conv.h:208
traits_type::int_type int_type
Definition: streambuf:131
_Wide_streambuf::int_type underflow()
Fetches more data from the controlled sequence.
Definition: locale_conv.h:388
wide_string from_bytes(char __byte)
Convert from bytes.
Definition: locale_conv.h:227
allocator_type get_allocator() const noexcept
Return copy of allocator used to construct this string.
byte_string to_bytes(const _Elem *__first, const _Elem *__last)
Convert to bytes.
Definition: locale_conv.h:281
size_type size() const noexcept
Returns the number of characters in the string, not including any null-termination.
const _CharT * data() const noexcept
Return const pointer to contents.
ISO C++ entities toplevel namespace is std.
result in(state_type &__state, const extern_type *__from, const extern_type *__from_end, const extern_type *&__from_next, intern_type *__to, intern_type *__to_end, intern_type *&__to_next) const
Convert from external to internal character set.
Definition: codecvt.h:196
_Wide_streambuf::int_type overflow(typename _Wide_streambuf::int_type __out)
Consumes data from the buffer; writes to the controlled sequence.
Definition: locale_conv.h:378
byte_string to_bytes(const _Elem *__ptr)
Convert to bytes.
Definition: locale_conv.h:268
int sync()
Synchronizes the buffer arrays with the controlled sequences.
Definition: locale_conv.h:374
size_t converted() const noexcept
The number of elements successfully converted in the last conversion.
Definition: locale_conv.h:298
Buffer conversions.
Definition: locale_conv.h:318