libstdc++
codecvt.h
Go to the documentation of this file.
1 // Locale support (codecvt) -*- C++ -*-
2 
3 // Copyright (C) 2000-2018 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/codecvt.h
26  * This is an internal header file, included by other library headers.
27  * Do not attempt to use it directly. @headername{locale}
28  */
29 
30 //
31 // ISO C++ 14882: 22.2.1.5 Template class codecvt
32 //
33 
34 // Written by Benjamin Kosnik <[email protected]>
35 
36 #ifndef _CODECVT_H
37 #define _CODECVT_H 1
38 
39 #pragma GCC system_header
40 
41 namespace std _GLIBCXX_VISIBILITY(default)
42 {
43 _GLIBCXX_BEGIN_NAMESPACE_VERSION
44 
45  /// Empty base class for codecvt facet [22.2.1.5].
47  {
48  public:
49  enum result
50  {
51  ok,
52  partial,
53  error,
54  noconv
55  };
56  };
57 
58  /**
59  * @brief Common base for codecvt functions.
60  *
61  * This template class provides implementations of the public functions
62  * that forward to the protected virtual functions.
63  *
64  * This template also provides abstract stubs for the protected virtual
65  * functions.
66  */
67  template<typename _InternT, typename _ExternT, typename _StateT>
69  : public locale::facet, public codecvt_base
70  {
71  public:
72  // Types:
73  typedef codecvt_base::result result;
74  typedef _InternT intern_type;
75  typedef _ExternT extern_type;
76  typedef _StateT state_type;
77 
78  // 22.2.1.5.1 codecvt members
79  /**
80  * @brief Convert from internal to external character set.
81  *
82  * Converts input string of intern_type to output string of
83  * extern_type. This is analogous to wcsrtombs. It does this by
84  * calling codecvt::do_out.
85  *
86  * The source and destination character sets are determined by the
87  * facet's locale, internal and external types.
88  *
89  * The characters in [from,from_end) are converted and written to
90  * [to,to_end). from_next and to_next are set to point to the
91  * character following the last successfully converted character,
92  * respectively. If the result needed no conversion, from_next and
93  * to_next are not affected.
94  *
95  * The @a state argument should be initialized if the input is at the
96  * beginning and carried from a previous call if continuing
97  * conversion. There are no guarantees about how @a state is used.
98  *
99  * The result returned is a member of codecvt_base::result. If
100  * all the input is converted, returns codecvt_base::ok. If no
101  * conversion is necessary, returns codecvt_base::noconv. If
102  * the input ends early or there is insufficient space in the
103  * output, returns codecvt_base::partial. Otherwise the
104  * conversion failed and codecvt_base::error is returned.
105  *
106  * @param __state Persistent conversion state data.
107  * @param __from Start of input.
108  * @param __from_end End of input.
109  * @param __from_next Returns start of unconverted data.
110  * @param __to Start of output buffer.
111  * @param __to_end End of output buffer.
112  * @param __to_next Returns start of unused output area.
113  * @return codecvt_base::result.
114  */
115  result
116  out(state_type& __state, const intern_type* __from,
117  const intern_type* __from_end, const intern_type*& __from_next,
118  extern_type* __to, extern_type* __to_end,
119  extern_type*& __to_next) const
120  {
121  return this->do_out(__state, __from, __from_end, __from_next,
122  __to, __to_end, __to_next);
123  }
124 
125  /**
126  * @brief Reset conversion state.
127  *
128  * Writes characters to output that would restore @a state to initial
129  * conditions. The idea is that if a partial conversion occurs, then
130  * the converting the characters written by this function would leave
131  * the state in initial conditions, rather than partial conversion
132  * state. It does this by calling codecvt::do_unshift().
133  *
134  * For example, if 4 external characters always converted to 1 internal
135  * character, and input to in() had 6 external characters with state
136  * saved, this function would write two characters to the output and
137  * set the state to initialized conditions.
138  *
139  * The source and destination character sets are determined by the
140  * facet's locale, internal and external types.
141  *
142  * The result returned is a member of codecvt_base::result. If the
143  * state could be reset and data written, returns codecvt_base::ok. If
144  * no conversion is necessary, returns codecvt_base::noconv. If the
145  * output has insufficient space, returns codecvt_base::partial.
146  * Otherwise the reset failed and codecvt_base::error is returned.
147  *
148  * @param __state Persistent conversion state data.
149  * @param __to Start of output buffer.
150  * @param __to_end End of output buffer.
151  * @param __to_next Returns start of unused output area.
152  * @return codecvt_base::result.
153  */
154  result
155  unshift(state_type& __state, extern_type* __to, extern_type* __to_end,
156  extern_type*& __to_next) const
157  { return this->do_unshift(__state, __to,__to_end,__to_next); }
158 
159  /**
160  * @brief Convert from external to internal character set.
161  *
162  * Converts input string of extern_type to output string of
163  * intern_type. This is analogous to mbsrtowcs. It does this by
164  * calling codecvt::do_in.
165  *
166  * The source and destination character sets are determined by the
167  * facet's locale, internal and external types.
168  *
169  * The characters in [from,from_end) are converted and written to
170  * [to,to_end). from_next and to_next are set to point to the
171  * character following the last successfully converted character,
172  * respectively. If the result needed no conversion, from_next and
173  * to_next are not affected.
174  *
175  * The @a state argument should be initialized if the input is at the
176  * beginning and carried from a previous call if continuing
177  * conversion. There are no guarantees about how @a state is used.
178  *
179  * The result returned is a member of codecvt_base::result. If
180  * all the input is converted, returns codecvt_base::ok. If no
181  * conversion is necessary, returns codecvt_base::noconv. If
182  * the input ends early or there is insufficient space in the
183  * output, returns codecvt_base::partial. Otherwise the
184  * conversion failed and codecvt_base::error is returned.
185  *
186  * @param __state Persistent conversion state data.
187  * @param __from Start of input.
188  * @param __from_end End of input.
189  * @param __from_next Returns start of unconverted data.
190  * @param __to Start of output buffer.
191  * @param __to_end End of output buffer.
192  * @param __to_next Returns start of unused output area.
193  * @return codecvt_base::result.
194  */
195  result
196  in(state_type& __state, const extern_type* __from,
197  const extern_type* __from_end, const extern_type*& __from_next,
198  intern_type* __to, intern_type* __to_end,
199  intern_type*& __to_next) const
200  {
201  return this->do_in(__state, __from, __from_end, __from_next,
202  __to, __to_end, __to_next);
203  }
204 
205  int
206  encoding() const throw()
207  { return this->do_encoding(); }
208 
209  bool
210  always_noconv() const throw()
211  { return this->do_always_noconv(); }
212 
213  int
214  length(state_type& __state, const extern_type* __from,
215  const extern_type* __end, size_t __max) const
216  { return this->do_length(__state, __from, __end, __max); }
217 
218  int
219  max_length() const throw()
220  { return this->do_max_length(); }
221 
222  protected:
223  explicit
224  __codecvt_abstract_base(size_t __refs = 0) : locale::facet(__refs) { }
225 
226  virtual
227  ~__codecvt_abstract_base() { }
228 
229  /**
230  * @brief Convert from internal to external character set.
231  *
232  * Converts input string of intern_type to output string of
233  * extern_type. This function is a hook for derived classes to change
234  * the value returned. @see out for more information.
235  */
236  virtual result
237  do_out(state_type& __state, const intern_type* __from,
238  const intern_type* __from_end, const intern_type*& __from_next,
239  extern_type* __to, extern_type* __to_end,
240  extern_type*& __to_next) const = 0;
241 
242  virtual result
243  do_unshift(state_type& __state, extern_type* __to,
244  extern_type* __to_end, extern_type*& __to_next) const = 0;
245 
246  virtual result
247  do_in(state_type& __state, const extern_type* __from,
248  const extern_type* __from_end, const extern_type*& __from_next,
249  intern_type* __to, intern_type* __to_end,
250  intern_type*& __to_next) const = 0;
251 
252  virtual int
253  do_encoding() const throw() = 0;
254 
255  virtual bool
256  do_always_noconv() const throw() = 0;
257 
258  virtual int
259  do_length(state_type&, const extern_type* __from,
260  const extern_type* __end, size_t __max) const = 0;
261 
262  virtual int
263  do_max_length() const throw() = 0;
264  };
265 
266  /**
267  * @brief Primary class template codecvt.
268  * @ingroup locales
269  *
270  * NB: Generic, mostly useless implementation.
271  *
272  */
273  template<typename _InternT, typename _ExternT, typename _StateT>
274  class codecvt
275  : public __codecvt_abstract_base<_InternT, _ExternT, _StateT>
276  {
277  public:
278  // Types:
279  typedef codecvt_base::result result;
280  typedef _InternT intern_type;
281  typedef _ExternT extern_type;
282  typedef _StateT state_type;
283 
284  protected:
285  __c_locale _M_c_locale_codecvt;
286 
287  public:
288  static locale::id id;
289 
290  explicit
291  codecvt(size_t __refs = 0)
293  _M_c_locale_codecvt(0)
294  { }
295 
296  explicit
297  codecvt(__c_locale __cloc, size_t __refs = 0);
298 
299  protected:
300  virtual
301  ~codecvt() { }
302 
303  virtual result
304  do_out(state_type& __state, const intern_type* __from,
305  const intern_type* __from_end, const intern_type*& __from_next,
306  extern_type* __to, extern_type* __to_end,
307  extern_type*& __to_next) const;
308 
309  virtual result
310  do_unshift(state_type& __state, extern_type* __to,
311  extern_type* __to_end, extern_type*& __to_next) const;
312 
313  virtual result
314  do_in(state_type& __state, const extern_type* __from,
315  const extern_type* __from_end, const extern_type*& __from_next,
316  intern_type* __to, intern_type* __to_end,
317  intern_type*& __to_next) const;
318 
319  virtual int
320  do_encoding() const throw();
321 
322  virtual bool
323  do_always_noconv() const throw();
324 
325  virtual int
326  do_length(state_type&, const extern_type* __from,
327  const extern_type* __end, size_t __max) const;
328 
329  virtual int
330  do_max_length() const throw();
331  };
332 
333  template<typename _InternT, typename _ExternT, typename _StateT>
335 
336  /// class codecvt<char, char, mbstate_t> specialization.
337  template<>
338  class codecvt<char, char, mbstate_t>
339  : public __codecvt_abstract_base<char, char, mbstate_t>
340  {
341  friend class messages<char>;
342 
343  public:
344  // Types:
345  typedef char intern_type;
346  typedef char extern_type;
347  typedef mbstate_t state_type;
348 
349  protected:
350  __c_locale _M_c_locale_codecvt;
351 
352  public:
353  static locale::id id;
354 
355  explicit
356  codecvt(size_t __refs = 0);
357 
358  explicit
359  codecvt(__c_locale __cloc, size_t __refs = 0);
360 
361  protected:
362  virtual
363  ~codecvt();
364 
365  virtual result
366  do_out(state_type& __state, const intern_type* __from,
367  const intern_type* __from_end, const intern_type*& __from_next,
368  extern_type* __to, extern_type* __to_end,
369  extern_type*& __to_next) const;
370 
371  virtual result
372  do_unshift(state_type& __state, extern_type* __to,
373  extern_type* __to_end, extern_type*& __to_next) const;
374 
375  virtual result
376  do_in(state_type& __state, const extern_type* __from,
377  const extern_type* __from_end, const extern_type*& __from_next,
378  intern_type* __to, intern_type* __to_end,
379  intern_type*& __to_next) const;
380 
381  virtual int
382  do_encoding() const throw();
383 
384  virtual bool
385  do_always_noconv() const throw();
386 
387  virtual int
388  do_length(state_type&, const extern_type* __from,
389  const extern_type* __end, size_t __max) const;
390 
391  virtual int
392  do_max_length() const throw();
393  };
394 
395 #ifdef _GLIBCXX_USE_WCHAR_T
396  /** @brief Class codecvt<wchar_t, char, mbstate_t> specialization.
397  *
398  * Converts between narrow and wide characters in the native character set
399  */
400  template<>
401  class codecvt<wchar_t, char, mbstate_t>
402  : public __codecvt_abstract_base<wchar_t, char, mbstate_t>
403  {
404  friend class messages<wchar_t>;
405 
406  public:
407  // Types:
408  typedef wchar_t intern_type;
409  typedef char extern_type;
410  typedef mbstate_t state_type;
411 
412  protected:
413  __c_locale _M_c_locale_codecvt;
414 
415  public:
416  static locale::id id;
417 
418  explicit
419  codecvt(size_t __refs = 0);
420 
421  explicit
422  codecvt(__c_locale __cloc, size_t __refs = 0);
423 
424  protected:
425  virtual
426  ~codecvt();
427 
428  virtual result
429  do_out(state_type& __state, const intern_type* __from,
430  const intern_type* __from_end, const intern_type*& __from_next,
431  extern_type* __to, extern_type* __to_end,
432  extern_type*& __to_next) const;
433 
434  virtual result
435  do_unshift(state_type& __state,
436  extern_type* __to, extern_type* __to_end,
437  extern_type*& __to_next) const;
438 
439  virtual result
440  do_in(state_type& __state,
441  const extern_type* __from, const extern_type* __from_end,
442  const extern_type*& __from_next,
443  intern_type* __to, intern_type* __to_end,
444  intern_type*& __to_next) const;
445 
446  virtual
447  int do_encoding() const throw();
448 
449  virtual
450  bool do_always_noconv() const throw();
451 
452  virtual
453  int do_length(state_type&, const extern_type* __from,
454  const extern_type* __end, size_t __max) const;
455 
456  virtual int
457  do_max_length() const throw();
458  };
459 #endif //_GLIBCXX_USE_WCHAR_T
460 
461 #if __cplusplus >= 201103L
462 #ifdef _GLIBCXX_USE_C99_STDINT_TR1
463  /** @brief Class codecvt<char16_t, char, mbstate_t> specialization.
464  *
465  * Converts between UTF-16 and UTF-8.
466  */
467  template<>
468  class codecvt<char16_t, char, mbstate_t>
469  : public __codecvt_abstract_base<char16_t, char, mbstate_t>
470  {
471  public:
472  // Types:
473  typedef char16_t intern_type;
474  typedef char extern_type;
475  typedef mbstate_t state_type;
476 
477  public:
478  static locale::id id;
479 
480  explicit
481  codecvt(size_t __refs = 0)
483 
484  protected:
485  virtual
486  ~codecvt();
487 
488  virtual result
489  do_out(state_type& __state, const intern_type* __from,
490  const intern_type* __from_end, const intern_type*& __from_next,
491  extern_type* __to, extern_type* __to_end,
492  extern_type*& __to_next) const;
493 
494  virtual result
495  do_unshift(state_type& __state,
496  extern_type* __to, extern_type* __to_end,
497  extern_type*& __to_next) const;
498 
499  virtual result
500  do_in(state_type& __state,
501  const extern_type* __from, const extern_type* __from_end,
502  const extern_type*& __from_next,
503  intern_type* __to, intern_type* __to_end,
504  intern_type*& __to_next) const;
505 
506  virtual
507  int do_encoding() const throw();
508 
509  virtual
510  bool do_always_noconv() const throw();
511 
512  virtual
513  int do_length(state_type&, const extern_type* __from,
514  const extern_type* __end, size_t __max) const;
515 
516  virtual int
517  do_max_length() const throw();
518  };
519 
520  /** @brief Class codecvt<char32_t, char, mbstate_t> specialization.
521  *
522  * Converts between UTF-32 and UTF-8.
523  */
524  template<>
525  class codecvt<char32_t, char, mbstate_t>
526  : public __codecvt_abstract_base<char32_t, char, mbstate_t>
527  {
528  public:
529  // Types:
530  typedef char32_t intern_type;
531  typedef char extern_type;
532  typedef mbstate_t state_type;
533 
534  public:
535  static locale::id id;
536 
537  explicit
538  codecvt(size_t __refs = 0)
540 
541  protected:
542  virtual
543  ~codecvt();
544 
545  virtual result
546  do_out(state_type& __state, const intern_type* __from,
547  const intern_type* __from_end, const intern_type*& __from_next,
548  extern_type* __to, extern_type* __to_end,
549  extern_type*& __to_next) const;
550 
551  virtual result
552  do_unshift(state_type& __state,
553  extern_type* __to, extern_type* __to_end,
554  extern_type*& __to_next) const;
555 
556  virtual result
557  do_in(state_type& __state,
558  const extern_type* __from, const extern_type* __from_end,
559  const extern_type*& __from_next,
560  intern_type* __to, intern_type* __to_end,
561  intern_type*& __to_next) const;
562 
563  virtual
564  int do_encoding() const throw();
565 
566  virtual
567  bool do_always_noconv() const throw();
568 
569  virtual
570  int do_length(state_type&, const extern_type* __from,
571  const extern_type* __end, size_t __max) const;
572 
573  virtual int
574  do_max_length() const throw();
575  };
576 
577 #endif // _GLIBCXX_USE_C99_STDINT_TR1
578 #endif // C++11
579 
580  /// class codecvt_byname [22.2.1.6].
581  template<typename _InternT, typename _ExternT, typename _StateT>
582  class codecvt_byname : public codecvt<_InternT, _ExternT, _StateT>
583  {
584  public:
585  explicit
586  codecvt_byname(const char* __s, size_t __refs = 0)
588  {
589  if (__builtin_strcmp(__s, "C") != 0
590  && __builtin_strcmp(__s, "POSIX") != 0)
591  {
592  this->_S_destroy_c_locale(this->_M_c_locale_codecvt);
593  this->_S_create_c_locale(this->_M_c_locale_codecvt, __s);
594  }
595  }
596 
597 #if __cplusplus >= 201103L
598  explicit
599  codecvt_byname(const string& __s, size_t __refs = 0)
600  : codecvt_byname(__s.c_str(), __refs) { }
601 #endif
602 
603  protected:
604  virtual
605  ~codecvt_byname() { }
606  };
607 
608 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
609  template<>
610  class codecvt_byname<char16_t, char, mbstate_t>
611  : public codecvt<char16_t, char, mbstate_t>
612  {
613  public:
614  explicit
615  codecvt_byname(const char*, size_t __refs = 0)
616  : codecvt<char16_t, char, mbstate_t>(__refs) { }
617 
618  explicit
619  codecvt_byname(const string& __s, size_t __refs = 0)
620  : codecvt_byname(__s.c_str(), __refs) { }
621 
622  protected:
623  virtual
624  ~codecvt_byname() { }
625  };
626 
627  template<>
628  class codecvt_byname<char32_t, char, mbstate_t>
629  : public codecvt<char32_t, char, mbstate_t>
630  {
631  public:
632  explicit
633  codecvt_byname(const char*, size_t __refs = 0)
634  : codecvt<char32_t, char, mbstate_t>(__refs) { }
635 
636  explicit
637  codecvt_byname(const string& __s, size_t __refs = 0)
638  : codecvt_byname(__s.c_str(), __refs) { }
639 
640  protected:
641  virtual
642  ~codecvt_byname() { }
643  };
644 #endif
645 
646  // Inhibit implicit instantiations for required instantiations,
647  // which are defined via explicit instantiations elsewhere.
648 #if _GLIBCXX_EXTERN_TEMPLATE
649  extern template class codecvt_byname<char, char, mbstate_t>;
650 
651  extern template
652  const codecvt<char, char, mbstate_t>&
653  use_facet<codecvt<char, char, mbstate_t> >(const locale&);
654 
655  extern template
656  bool
657  has_facet<codecvt<char, char, mbstate_t> >(const locale&);
658 
659 #ifdef _GLIBCXX_USE_WCHAR_T
660  extern template class codecvt_byname<wchar_t, char, mbstate_t>;
661 
662  extern template
663  const codecvt<wchar_t, char, mbstate_t>&
664  use_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
665 
666  extern template
667  bool
668  has_facet<codecvt<wchar_t, char, mbstate_t> >(const locale&);
669 #endif
670 
671 #if __cplusplus >= 201103L && defined(_GLIBCXX_USE_C99_STDINT_TR1)
672  extern template class codecvt_byname<char16_t, char, mbstate_t>;
673  extern template class codecvt_byname<char32_t, char, mbstate_t>;
674 #endif
675 
676 #endif
677 
678 _GLIBCXX_END_NAMESPACE_VERSION
679 } // namespace std
680 
681 #endif // _CODECVT_H
result out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
Definition: codecvt.h:116
Common base for codecvt functions.
Definition: codecvt.h:68
Localization functionality base class.The facet class is the base class for a localization feature,...
Primary class template messages.This facet encapsulates the code to retrieve messages from message ca...
result in(state_type &__state, const extern_type *__from, const extern_type *__from_end, const extern_type *&__from_next, intern_type *__to, intern_type *__to_end, intern_type *&__to_next) const
Convert from external to internal character set.
Definition: codecvt.h:196
facet(size_t __refs=0)
Facet constructor.
result unshift(state_type &__state, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Reset conversion state.
Definition: codecvt.h:155
virtual result do_out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const =0
Convert from internal to external character set.
ISO C++ entities toplevel namespace is std.
const _CharT * c_str() const noexcept
Return const pointer to null-terminated contents.
Empty base class for codecvt facet [22.2.1.5].
Definition: codecvt.h:46
Primary class template codecvt.NB: Generic, mostly useless implementation.
Definition: codecvt.h:274
Facet ID class.The ID class provides facets with an index used to identify them. Every facet class mu...
class codecvt_byname [22.2.1.6].
Definition: codecvt.h:582