libstdc++
codecvt_specializations.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 //
26 // ISO C++ 14882: 22.2.1.5 Template class codecvt
27 //
28 
29 // Written by Benjamin Kosnik <[email protected]>
30 
31 /** @file ext/codecvt_specializations.h
32  * This file is a GNU extension to the Standard C++ Library.
33  */
34 
35 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
36 #define _EXT_CODECVT_SPECIALIZATIONS_H 1
37 
38 #include <bits/c++config.h>
39 #include <locale>
40 #include <iconv.h>
41 
42 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
43 {
44 _GLIBCXX_BEGIN_NAMESPACE_VERSION
45 
46  /// Extension to use iconv for dealing with character encodings.
47  // This includes conversions and comparisons between various character
48  // sets. This object encapsulates data that may need to be shared between
49  // char_traits, codecvt and ctype.
51  {
52  public:
53  // Types:
54  // NB: A conversion descriptor subsumes and enhances the
55  // functionality of a simple state type such as mbstate_t.
56  typedef iconv_t descriptor_type;
57 
58  protected:
59  // Name of internal character set encoding.
60  std::string _M_int_enc;
61 
62  // Name of external character set encoding.
63  std::string _M_ext_enc;
64 
65  // Conversion descriptor between external encoding to internal encoding.
66  descriptor_type _M_in_desc;
67 
68  // Conversion descriptor between internal encoding to external encoding.
69  descriptor_type _M_out_desc;
70 
71  // The byte-order marker for the external encoding, if necessary.
72  int _M_ext_bom;
73 
74  // The byte-order marker for the internal encoding, if necessary.
75  int _M_int_bom;
76 
77  // Number of external bytes needed to construct one complete
78  // character in the internal encoding.
79  // NB: -1 indicates variable, or stateful, encodings.
80  int _M_bytes;
81 
82  public:
83  explicit
85  : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
86  { }
87 
88  explicit
89  encoding_state(const char* __int, const char* __ext,
90  int __ibom = 0, int __ebom = 0, int __bytes = 1)
91  : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
92  _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
93  { init(); }
94 
95  // 21.1.2 traits typedefs
96  // p4
97  // typedef STATE_T state_type
98  // requires: state_type shall meet the requirements of
99  // CopyConstructible types (20.1.3)
100  // NB: This does not preserve the actual state of the conversion
101  // descriptor member, but it does duplicate the encoding
102  // information.
103  encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
104  { construct(__obj); }
105 
106  // Need assignment operator as well.
108  operator=(const encoding_state& __obj)
109  {
110  construct(__obj);
111  return *this;
112  }
113 
114  ~encoding_state()
115  { destroy(); }
116 
117  bool
118  good() const throw()
119  {
120  const descriptor_type __err = (iconv_t)(-1);
121  bool __test = _M_in_desc && _M_in_desc != __err;
122  __test &= _M_out_desc && _M_out_desc != __err;
123  return __test;
124  }
125 
126  int
127  character_ratio() const
128  { return _M_bytes; }
129 
130  const std::string
131  internal_encoding() const
132  { return _M_int_enc; }
133 
134  int
135  internal_bom() const
136  { return _M_int_bom; }
137 
138  const std::string
139  external_encoding() const
140  { return _M_ext_enc; }
141 
142  int
143  external_bom() const
144  { return _M_ext_bom; }
145 
146  const descriptor_type&
147  in_descriptor() const
148  { return _M_in_desc; }
149 
150  const descriptor_type&
151  out_descriptor() const
152  { return _M_out_desc; }
153 
154  protected:
155  void
156  init()
157  {
158  const descriptor_type __err = (iconv_t)(-1);
159  const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
160  if (!_M_in_desc && __have_encodings)
161  {
162  _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
163  if (_M_in_desc == __err)
164  std::__throw_runtime_error(__N("encoding_state::_M_init "
165  "creating iconv input descriptor failed"));
166  }
167  if (!_M_out_desc && __have_encodings)
168  {
169  _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
170  if (_M_out_desc == __err)
171  std::__throw_runtime_error(__N("encoding_state::_M_init "
172  "creating iconv output descriptor failed"));
173  }
174  }
175 
176  void
177  construct(const encoding_state& __obj)
178  {
179  destroy();
180  _M_int_enc = __obj._M_int_enc;
181  _M_ext_enc = __obj._M_ext_enc;
182  _M_ext_bom = __obj._M_ext_bom;
183  _M_int_bom = __obj._M_int_bom;
184  _M_bytes = __obj._M_bytes;
185  init();
186  }
187 
188  void
189  destroy() throw()
190  {
191  const descriptor_type __err = (iconv_t)(-1);
192  if (_M_in_desc && _M_in_desc != __err)
193  {
194  iconv_close(_M_in_desc);
195  _M_in_desc = 0;
196  }
197  if (_M_out_desc && _M_out_desc != __err)
198  {
199  iconv_close(_M_out_desc);
200  _M_out_desc = 0;
201  }
202  }
203  };
204 
205  /// encoding_char_traits
206  // Custom traits type with encoding_state for the state type, and the
207  // associated fpos<encoding_state> for the position type, all other
208  // bits equivalent to the required char_traits instantiations.
209  template<typename _CharT>
210  struct encoding_char_traits : public std::char_traits<_CharT>
211  {
212  typedef encoding_state state_type;
213  typedef typename std::fpos<state_type> pos_type;
214  };
215 
216 _GLIBCXX_END_NAMESPACE_VERSION
217 } // namespace
218 
219 
220 namespace std _GLIBCXX_VISIBILITY(default)
221 {
222 _GLIBCXX_BEGIN_NAMESPACE_VERSION
223 
225 
226  /// codecvt<InternT, _ExternT, encoding_state> specialization.
227  // This partial specialization takes advantage of iconv to provide
228  // code conversions between a large number of character encodings.
229  template<typename _InternT, typename _ExternT>
230  class codecvt<_InternT, _ExternT, encoding_state>
231  : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
232  {
233  public:
234  // Types:
235  typedef codecvt_base::result result;
236  typedef _InternT intern_type;
237  typedef _ExternT extern_type;
238  typedef __gnu_cxx::encoding_state state_type;
239  typedef state_type::descriptor_type descriptor_type;
240 
241  // Data Members:
242  static locale::id id;
243 
244  explicit
245  codecvt(size_t __refs = 0)
247  { }
248 
249  explicit
250  codecvt(state_type& __enc, size_t __refs = 0)
252  { }
253 
254  protected:
255  virtual
256  ~codecvt() { }
257 
258  virtual result
259  do_out(state_type& __state, const intern_type* __from,
260  const intern_type* __from_end, const intern_type*& __from_next,
261  extern_type* __to, extern_type* __to_end,
262  extern_type*& __to_next) const;
263 
264  virtual result
265  do_unshift(state_type& __state, extern_type* __to,
266  extern_type* __to_end, extern_type*& __to_next) const;
267 
268  virtual result
269  do_in(state_type& __state, const extern_type* __from,
270  const extern_type* __from_end, const extern_type*& __from_next,
271  intern_type* __to, intern_type* __to_end,
272  intern_type*& __to_next) const;
273 
274  virtual int
275  do_encoding() const throw();
276 
277  virtual bool
278  do_always_noconv() const throw();
279 
280  virtual int
281  do_length(state_type&, const extern_type* __from,
282  const extern_type* __end, size_t __max) const;
283 
284  virtual int
285  do_max_length() const throw();
286  };
287 
288  template<typename _InternT, typename _ExternT>
289  locale::id
291 
292  // This adaptor works around the signature problems of the second
293  // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2
294  // uses 'char**', which matches the POSIX 1003.1-2001 standard.
295  // Using this adaptor, g++ will do the work for us.
296  template<typename _Tp>
297  inline size_t
298  __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
299  iconv_t __cd, char** __inbuf, size_t* __inbytes,
300  char** __outbuf, size_t* __outbytes)
301  { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
302 
303  template<typename _InternT, typename _ExternT>
304  codecvt_base::result
306  do_out(state_type& __state, const intern_type* __from,
307  const intern_type* __from_end, const intern_type*& __from_next,
308  extern_type* __to, extern_type* __to_end,
309  extern_type*& __to_next) const
310  {
311  result __ret = codecvt_base::error;
312  if (__state.good())
313  {
314  const descriptor_type& __desc = __state.out_descriptor();
315  const size_t __fmultiple = sizeof(intern_type);
316  size_t __fbytes = __fmultiple * (__from_end - __from);
317  const size_t __tmultiple = sizeof(extern_type);
318  size_t __tbytes = __tmultiple * (__to_end - __to);
319 
320  // Argument list for iconv specifies a byte sequence. Thus,
321  // all to/from arrays must be brutally casted to char*.
322  char* __cto = reinterpret_cast<char*>(__to);
323  char* __cfrom;
324  size_t __conv;
325 
326  // Some encodings need a byte order marker as the first item
327  // in the byte stream, to designate endian-ness. The default
328  // value for the byte order marker is NULL, so if this is
329  // the case, it's not necessary and we can just go on our
330  // merry way.
331  int __int_bom = __state.internal_bom();
332  if (__int_bom)
333  {
334  size_t __size = __from_end - __from;
335  intern_type* __cfixed = static_cast<intern_type*>
336  (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
337  __cfixed[0] = static_cast<intern_type>(__int_bom);
338  char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
339  __cfrom = reinterpret_cast<char*>(__cfixed);
340  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
341  &__fbytes, &__cto, &__tbytes);
342  }
343  else
344  {
345  intern_type* __cfixed = const_cast<intern_type*>(__from);
346  __cfrom = reinterpret_cast<char*>(__cfixed);
347  __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
348  &__cto, &__tbytes);
349  }
350 
351  if (__conv != size_t(-1))
352  {
353  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
354  __to_next = reinterpret_cast<extern_type*>(__cto);
355  __ret = codecvt_base::ok;
356  }
357  else
358  {
359  if (__fbytes < __fmultiple * (__from_end - __from))
360  {
361  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
362  __to_next = reinterpret_cast<extern_type*>(__cto);
363  __ret = codecvt_base::partial;
364  }
365  else
366  __ret = codecvt_base::error;
367  }
368  }
369  return __ret;
370  }
371 
372  template<typename _InternT, typename _ExternT>
373  codecvt_base::result
375  do_unshift(state_type& __state, extern_type* __to,
376  extern_type* __to_end, extern_type*& __to_next) const
377  {
378  result __ret = codecvt_base::error;
379  if (__state.good())
380  {
381  const descriptor_type& __desc = __state.in_descriptor();
382  const size_t __tmultiple = sizeof(intern_type);
383  size_t __tlen = __tmultiple * (__to_end - __to);
384 
385  // Argument list for iconv specifies a byte sequence. Thus,
386  // all to/from arrays must be brutally casted to char*.
387  char* __cto = reinterpret_cast<char*>(__to);
388  size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
389  &__cto, &__tlen);
390 
391  if (__conv != size_t(-1))
392  {
393  __to_next = reinterpret_cast<extern_type*>(__cto);
394  if (__tlen == __tmultiple * (__to_end - __to))
395  __ret = codecvt_base::noconv;
396  else if (__tlen == 0)
397  __ret = codecvt_base::ok;
398  else
399  __ret = codecvt_base::partial;
400  }
401  else
402  __ret = codecvt_base::error;
403  }
404  return __ret;
405  }
406 
407  template<typename _InternT, typename _ExternT>
408  codecvt_base::result
409  codecvt<_InternT, _ExternT, encoding_state>::
410  do_in(state_type& __state, const extern_type* __from,
411  const extern_type* __from_end, const extern_type*& __from_next,
412  intern_type* __to, intern_type* __to_end,
413  intern_type*& __to_next) const
414  {
415  result __ret = codecvt_base::error;
416  if (__state.good())
417  {
418  const descriptor_type& __desc = __state.in_descriptor();
419  const size_t __fmultiple = sizeof(extern_type);
420  size_t __flen = __fmultiple * (__from_end - __from);
421  const size_t __tmultiple = sizeof(intern_type);
422  size_t __tlen = __tmultiple * (__to_end - __to);
423 
424  // Argument list for iconv specifies a byte sequence. Thus,
425  // all to/from arrays must be brutally casted to char*.
426  char* __cto = reinterpret_cast<char*>(__to);
427  char* __cfrom;
428  size_t __conv;
429 
430  // Some encodings need a byte order marker as the first item
431  // in the byte stream, to designate endian-ness. The default
432  // value for the byte order marker is NULL, so if this is
433  // the case, it's not necessary and we can just go on our
434  // merry way.
435  int __ext_bom = __state.external_bom();
436  if (__ext_bom)
437  {
438  size_t __size = __from_end - __from;
439  extern_type* __cfixed = static_cast<extern_type*>
440  (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
441  __cfixed[0] = static_cast<extern_type>(__ext_bom);
442  char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
443  __cfrom = reinterpret_cast<char*>(__cfixed);
444  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
445  &__flen, &__cto, &__tlen);
446  }
447  else
448  {
449  extern_type* __cfixed = const_cast<extern_type*>(__from);
450  __cfrom = reinterpret_cast<char*>(__cfixed);
451  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
452  &__flen, &__cto, &__tlen);
453  }
454 
455 
456  if (__conv != size_t(-1))
457  {
458  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
459  __to_next = reinterpret_cast<intern_type*>(__cto);
460  __ret = codecvt_base::ok;
461  }
462  else
463  {
464  if (__flen < static_cast<size_t>(__from_end - __from))
465  {
466  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
467  __to_next = reinterpret_cast<intern_type*>(__cto);
468  __ret = codecvt_base::partial;
469  }
470  else
471  __ret = codecvt_base::error;
472  }
473  }
474  return __ret;
475  }
476 
477  template<typename _InternT, typename _ExternT>
478  int
479  codecvt<_InternT, _ExternT, encoding_state>::
480  do_encoding() const throw()
481  {
482  int __ret = 0;
483  if (sizeof(_ExternT) <= sizeof(_InternT))
484  __ret = sizeof(_InternT) / sizeof(_ExternT);
485  return __ret;
486  }
487 
488  template<typename _InternT, typename _ExternT>
489  bool
490  codecvt<_InternT, _ExternT, encoding_state>::
491  do_always_noconv() const throw()
492  { return false; }
493 
494  template<typename _InternT, typename _ExternT>
495  int
496  codecvt<_InternT, _ExternT, encoding_state>::
497  do_length(state_type&, const extern_type* __from,
498  const extern_type* __end, size_t __max) const
499  { return std::min(__max, static_cast<size_t>(__end - __from)); }
500 
501  // _GLIBCXX_RESOLVE_LIB_DEFECTS
502  // 74. Garbled text for codecvt::do_max_length
503  template<typename _InternT, typename _ExternT>
504  int
505  codecvt<_InternT, _ExternT, encoding_state>::
506  do_max_length() const throw()
507  { return 1; }
508 
509 _GLIBCXX_END_NAMESPACE_VERSION
510 } // namespace
511 
512 #endif
Class representing stream positions.
Definition: postypes.h:112
Basis for explicit traits specializations.
Definition: char_traits.h:227
size_type size() const noexcept
Returns the number of characters in the string, not including any null-termination.
Definition: basic_string.h:715
const _CharT * c_str() const noexcept
Return const pointer to null-terminated contents.
GNU extensions for public use.
ISO C++ entities toplevel namespace is std.
Facet ID class.The ID class provides facets with an index used to identify them. Every facet class mu...
Extension to use iconv for dealing with character encodings.
Primary class template codecvt.NB: Generic, mostly useless implementation.
Definition: codecvt.h:276
virtual result do_out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:193
Common base for codecvt functions.
Definition: codecvt.h:68