libstdc++
codecvt_specializations.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007,
4 // 2008, 2009, 2010
5 // Free Software Foundation, Inc.
6 //
7 // This file is part of the GNU ISO C++ Library. This library is free
8 // software; you can redistribute it and/or modify it under the
9 // terms of the GNU General Public License as published by the
10 // Free Software Foundation; either version 3, or (at your option)
11 // any later version.
12 
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU General Public License for more details.
17 
18 // Under Section 7 of GPL version 3, you are granted additional
19 // permissions described in the GCC Runtime Library Exception, version
20 // 3.1, as published by the Free Software Foundation.
21 
22 // You should have received a copy of the GNU General Public License and
23 // a copy of the GCC Runtime Library Exception along with this program;
24 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
25 // <http://www.gnu.org/licenses/>.
26 
27 //
28 // ISO C++ 14882: 22.2.1.5 Template class codecvt
29 //
30 
31 // Written by Benjamin Kosnik <[email protected]>
32 
33 /** @file ext/codecvt_specializations.h
34  * This file is a GNU extension to the Standard C++ Library.
35  */
36 
37 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
38 #define _EXT_CODECVT_SPECIALIZATIONS_H 1
39 
40 #include <bits/c++config.h>
41 #include <locale>
42 #include <iconv.h>
43 
44 namespace __gnu_cxx _GLIBCXX_VISIBILITY(default)
45 {
46 _GLIBCXX_BEGIN_NAMESPACE_VERSION
47 
48  /// Extension to use iconv for dealing with character encodings.
49  // This includes conversions and comparisons between various character
50  // sets. This object encapsulates data that may need to be shared between
51  // char_traits, codecvt and ctype.
53  {
54  public:
55  // Types:
56  // NB: A conversion descriptor subsumes and enhances the
57  // functionality of a simple state type such as mbstate_t.
58  typedef iconv_t descriptor_type;
59 
60  protected:
61  // Name of internal character set encoding.
62  std::string _M_int_enc;
63 
64  // Name of external character set encoding.
65  std::string _M_ext_enc;
66 
67  // Conversion descriptor between external encoding to internal encoding.
68  descriptor_type _M_in_desc;
69 
70  // Conversion descriptor between internal encoding to external encoding.
71  descriptor_type _M_out_desc;
72 
73  // The byte-order marker for the external encoding, if necessary.
74  int _M_ext_bom;
75 
76  // The byte-order marker for the internal encoding, if necessary.
77  int _M_int_bom;
78 
79  // Number of external bytes needed to construct one complete
80  // character in the internal encoding.
81  // NB: -1 indicates variable, or stateful, encodings.
82  int _M_bytes;
83 
84  public:
85  explicit
87  : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
88  { }
89 
90  explicit
91  encoding_state(const char* __int, const char* __ext,
92  int __ibom = 0, int __ebom = 0, int __bytes = 1)
93  : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0),
94  _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
95  { init(); }
96 
97  // 21.1.2 traits typedefs
98  // p4
99  // typedef STATE_T state_type
100  // requires: state_type shall meet the requirements of
101  // CopyConstructible types (20.1.3)
102  // NB: This does not preserve the actual state of the conversion
103  // descriptor member, but it does duplicate the encoding
104  // information.
105  encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
106  { construct(__obj); }
107 
108  // Need assignment operator as well.
110  operator=(const encoding_state& __obj)
111  {
112  construct(__obj);
113  return *this;
114  }
115 
116  ~encoding_state()
117  { destroy(); }
118 
119  bool
120  good() const throw()
121  {
122  const descriptor_type __err = (iconv_t)(-1);
123  bool __test = _M_in_desc && _M_in_desc != __err;
124  __test &= _M_out_desc && _M_out_desc != __err;
125  return __test;
126  }
127 
128  int
129  character_ratio() const
130  { return _M_bytes; }
131 
132  const std::string
133  internal_encoding() const
134  { return _M_int_enc; }
135 
136  int
137  internal_bom() const
138  { return _M_int_bom; }
139 
140  const std::string
141  external_encoding() const
142  { return _M_ext_enc; }
143 
144  int
145  external_bom() const
146  { return _M_ext_bom; }
147 
148  const descriptor_type&
149  in_descriptor() const
150  { return _M_in_desc; }
151 
152  const descriptor_type&
153  out_descriptor() const
154  { return _M_out_desc; }
155 
156  protected:
157  void
158  init()
159  {
160  const descriptor_type __err = (iconv_t)(-1);
161  const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
162  if (!_M_in_desc && __have_encodings)
163  {
164  _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
165  if (_M_in_desc == __err)
166  std::__throw_runtime_error(__N("encoding_state::_M_init "
167  "creating iconv input descriptor failed"));
168  }
169  if (!_M_out_desc && __have_encodings)
170  {
171  _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
172  if (_M_out_desc == __err)
173  std::__throw_runtime_error(__N("encoding_state::_M_init "
174  "creating iconv output descriptor failed"));
175  }
176  }
177 
178  void
179  construct(const encoding_state& __obj)
180  {
181  destroy();
182  _M_int_enc = __obj._M_int_enc;
183  _M_ext_enc = __obj._M_ext_enc;
184  _M_ext_bom = __obj._M_ext_bom;
185  _M_int_bom = __obj._M_int_bom;
186  _M_bytes = __obj._M_bytes;
187  init();
188  }
189 
190  void
191  destroy() throw()
192  {
193  const descriptor_type __err = (iconv_t)(-1);
194  if (_M_in_desc && _M_in_desc != __err)
195  {
196  iconv_close(_M_in_desc);
197  _M_in_desc = 0;
198  }
199  if (_M_out_desc && _M_out_desc != __err)
200  {
201  iconv_close(_M_out_desc);
202  _M_out_desc = 0;
203  }
204  }
205  };
206 
207  /// encoding_char_traits
208  // Custom traits type with encoding_state for the state type, and the
209  // associated fpos<encoding_state> for the position type, all other
210  // bits equivalent to the required char_traits instantiations.
211  template<typename _CharT>
212  struct encoding_char_traits : public std::char_traits<_CharT>
213  {
214  typedef encoding_state state_type;
215  typedef typename std::fpos<state_type> pos_type;
216  };
217 
218 _GLIBCXX_END_NAMESPACE_VERSION
219 } // namespace
220 
221 
222 namespace std _GLIBCXX_VISIBILITY(default)
223 {
224 _GLIBCXX_BEGIN_NAMESPACE_VERSION
225 
227 
228  /// codecvt<InternT, _ExternT, encoding_state> specialization.
229  // This partial specialization takes advantage of iconv to provide
230  // code conversions between a large number of character encodings.
231  template<typename _InternT, typename _ExternT>
232  class codecvt<_InternT, _ExternT, encoding_state>
233  : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
234  {
235  public:
236  // Types:
237  typedef codecvt_base::result result;
238  typedef _InternT intern_type;
239  typedef _ExternT extern_type;
241  typedef state_type::descriptor_type descriptor_type;
242 
243  // Data Members:
244  static locale::id id;
245 
246  explicit
247  codecvt(size_t __refs = 0)
249  { }
250 
251  explicit
252  codecvt(state_type& __enc, size_t __refs = 0)
254  { }
255 
256  protected:
257  virtual
258  ~codecvt() { }
259 
260  virtual result
261  do_out(state_type& __state, const intern_type* __from,
262  const intern_type* __from_end, const intern_type*& __from_next,
263  extern_type* __to, extern_type* __to_end,
264  extern_type*& __to_next) const;
265 
266  virtual result
267  do_unshift(state_type& __state, extern_type* __to,
268  extern_type* __to_end, extern_type*& __to_next) const;
269 
270  virtual result
271  do_in(state_type& __state, const extern_type* __from,
272  const extern_type* __from_end, const extern_type*& __from_next,
273  intern_type* __to, intern_type* __to_end,
274  intern_type*& __to_next) const;
275 
276  virtual int
277  do_encoding() const throw();
278 
279  virtual bool
280  do_always_noconv() const throw();
281 
282  virtual int
283  do_length(state_type&, const extern_type* __from,
284  const extern_type* __end, size_t __max) const;
285 
286  virtual int
287  do_max_length() const throw();
288  };
289 
290  template<typename _InternT, typename _ExternT>
291  locale::id
293 
294  // This adaptor works around the signature problems of the second
295  // argument to iconv(): SUSv2 and others use 'const char**', but glibc 2.2
296  // uses 'char**', which matches the POSIX 1003.1-2001 standard.
297  // Using this adaptor, g++ will do the work for us.
298  template<typename _Tp>
299  inline size_t
300  __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
301  iconv_t __cd, char** __inbuf, size_t* __inbytes,
302  char** __outbuf, size_t* __outbytes)
303  { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
304 
305  template<typename _InternT, typename _ExternT>
306  codecvt_base::result
308  do_out(state_type& __state, const intern_type* __from,
309  const intern_type* __from_end, const intern_type*& __from_next,
310  extern_type* __to, extern_type* __to_end,
311  extern_type*& __to_next) const
312  {
313  result __ret = codecvt_base::error;
314  if (__state.good())
315  {
316  const descriptor_type& __desc = __state.out_descriptor();
317  const size_t __fmultiple = sizeof(intern_type);
318  size_t __fbytes = __fmultiple * (__from_end - __from);
319  const size_t __tmultiple = sizeof(extern_type);
320  size_t __tbytes = __tmultiple * (__to_end - __to);
321 
322  // Argument list for iconv specifies a byte sequence. Thus,
323  // all to/from arrays must be brutally casted to char*.
324  char* __cto = reinterpret_cast<char*>(__to);
325  char* __cfrom;
326  size_t __conv;
327 
328  // Some encodings need a byte order marker as the first item
329  // in the byte stream, to designate endian-ness. The default
330  // value for the byte order marker is NULL, so if this is
331  // the case, it's not necessary and we can just go on our
332  // merry way.
333  int __int_bom = __state.internal_bom();
334  if (__int_bom)
335  {
336  size_t __size = __from_end - __from;
337  intern_type* __cfixed = static_cast<intern_type*>
338  (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
339  __cfixed[0] = static_cast<intern_type>(__int_bom);
340  char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
341  __cfrom = reinterpret_cast<char*>(__cfixed);
342  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
343  &__fbytes, &__cto, &__tbytes);
344  }
345  else
346  {
347  intern_type* __cfixed = const_cast<intern_type*>(__from);
348  __cfrom = reinterpret_cast<char*>(__cfixed);
349  __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes,
350  &__cto, &__tbytes);
351  }
352 
353  if (__conv != size_t(-1))
354  {
355  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
356  __to_next = reinterpret_cast<extern_type*>(__cto);
357  __ret = codecvt_base::ok;
358  }
359  else
360  {
361  if (__fbytes < __fmultiple * (__from_end - __from))
362  {
363  __from_next = reinterpret_cast<const intern_type*>(__cfrom);
364  __to_next = reinterpret_cast<extern_type*>(__cto);
365  __ret = codecvt_base::partial;
366  }
367  else
368  __ret = codecvt_base::error;
369  }
370  }
371  return __ret;
372  }
373 
374  template<typename _InternT, typename _ExternT>
375  codecvt_base::result
377  do_unshift(state_type& __state, extern_type* __to,
378  extern_type* __to_end, extern_type*& __to_next) const
379  {
380  result __ret = codecvt_base::error;
381  if (__state.good())
382  {
383  const descriptor_type& __desc = __state.in_descriptor();
384  const size_t __tmultiple = sizeof(intern_type);
385  size_t __tlen = __tmultiple * (__to_end - __to);
386 
387  // Argument list for iconv specifies a byte sequence. Thus,
388  // all to/from arrays must be brutally casted to char*.
389  char* __cto = reinterpret_cast<char*>(__to);
390  size_t __conv = __iconv_adaptor(iconv,__desc, 0, 0,
391  &__cto, &__tlen);
392 
393  if (__conv != size_t(-1))
394  {
395  __to_next = reinterpret_cast<extern_type*>(__cto);
396  if (__tlen == __tmultiple * (__to_end - __to))
397  __ret = codecvt_base::noconv;
398  else if (__tlen == 0)
399  __ret = codecvt_base::ok;
400  else
401  __ret = codecvt_base::partial;
402  }
403  else
404  __ret = codecvt_base::error;
405  }
406  return __ret;
407  }
408 
409  template<typename _InternT, typename _ExternT>
410  codecvt_base::result
411  codecvt<_InternT, _ExternT, encoding_state>::
412  do_in(state_type& __state, const extern_type* __from,
413  const extern_type* __from_end, const extern_type*& __from_next,
414  intern_type* __to, intern_type* __to_end,
415  intern_type*& __to_next) const
416  {
417  result __ret = codecvt_base::error;
418  if (__state.good())
419  {
420  const descriptor_type& __desc = __state.in_descriptor();
421  const size_t __fmultiple = sizeof(extern_type);
422  size_t __flen = __fmultiple * (__from_end - __from);
423  const size_t __tmultiple = sizeof(intern_type);
424  size_t __tlen = __tmultiple * (__to_end - __to);
425 
426  // Argument list for iconv specifies a byte sequence. Thus,
427  // all to/from arrays must be brutally casted to char*.
428  char* __cto = reinterpret_cast<char*>(__to);
429  char* __cfrom;
430  size_t __conv;
431 
432  // Some encodings need a byte order marker as the first item
433  // in the byte stream, to designate endian-ness. The default
434  // value for the byte order marker is NULL, so if this is
435  // the case, it's not necessary and we can just go on our
436  // merry way.
437  int __ext_bom = __state.external_bom();
438  if (__ext_bom)
439  {
440  size_t __size = __from_end - __from;
441  extern_type* __cfixed = static_cast<extern_type*>
442  (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
443  __cfixed[0] = static_cast<extern_type>(__ext_bom);
444  char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
445  __cfrom = reinterpret_cast<char*>(__cfixed);
446  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
447  &__flen, &__cto, &__tlen);
448  }
449  else
450  {
451  extern_type* __cfixed = const_cast<extern_type*>(__from);
452  __cfrom = reinterpret_cast<char*>(__cfixed);
453  __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
454  &__flen, &__cto, &__tlen);
455  }
456 
457 
458  if (__conv != size_t(-1))
459  {
460  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
461  __to_next = reinterpret_cast<intern_type*>(__cto);
462  __ret = codecvt_base::ok;
463  }
464  else
465  {
466  if (__flen < static_cast<size_t>(__from_end - __from))
467  {
468  __from_next = reinterpret_cast<const extern_type*>(__cfrom);
469  __to_next = reinterpret_cast<intern_type*>(__cto);
470  __ret = codecvt_base::partial;
471  }
472  else
473  __ret = codecvt_base::error;
474  }
475  }
476  return __ret;
477  }
478 
479  template<typename _InternT, typename _ExternT>
480  int
481  codecvt<_InternT, _ExternT, encoding_state>::
482  do_encoding() const throw()
483  {
484  int __ret = 0;
485  if (sizeof(_ExternT) <= sizeof(_InternT))
486  __ret = sizeof(_InternT) / sizeof(_ExternT);
487  return __ret;
488  }
489 
490  template<typename _InternT, typename _ExternT>
491  bool
492  codecvt<_InternT, _ExternT, encoding_state>::
493  do_always_noconv() const throw()
494  { return false; }
495 
496  template<typename _InternT, typename _ExternT>
497  int
498  codecvt<_InternT, _ExternT, encoding_state>::
499  do_length(state_type&, const extern_type* __from,
500  const extern_type* __end, size_t __max) const
501  { return std::min(__max, static_cast<size_t>(__end - __from)); }
502 
503  // _GLIBCXX_RESOLVE_LIB_DEFECTS
504  // 74. Garbled text for codecvt::do_max_length
505  template<typename _InternT, typename _ExternT>
506  int
507  codecvt<_InternT, _ExternT, encoding_state>::
508  do_max_length() const throw()
509  { return 1; }
510 
511 _GLIBCXX_END_NAMESPACE_VERSION
512 } // namespace
513 
514 #endif