25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_X86_CONVERSIONS_H 
   26#define _GLIBCXX_EXPERIMENTAL_SIMD_X86_CONVERSIONS_H 
   28#if __cplusplus >= 201703L 
   32template <
typename _To, 
typename _V, 
typename _Traits>
 
   33  _GLIBCXX_SIMD_INTRINSIC _To
 
   36    static_assert(__is_vector_type_v<_V>);
 
   37    using _Tp = 
typename _Traits::value_type;
 
   38    constexpr size_t _Np = _Traits::_S_full_size;
 
   39    [[maybe_unused]] 
const auto __intrin = __to_intrin(__v);
 
   40    using _Up = 
typename _VectorTraits<_To>::value_type;
 
   41    constexpr size_t _M = _VectorTraits<_To>::_S_full_size;
 
   44    [[maybe_unused]] 
constexpr bool __x_to_x
 
   45      = 
sizeof(__v) <= 16 && 
sizeof(_To) <= 16;
 
   46    [[maybe_unused]] 
constexpr bool __x_to_y
 
   47      = 
sizeof(__v) <= 16 && 
sizeof(_To) == 32;
 
   48    [[maybe_unused]] 
constexpr bool __x_to_z
 
   49      = 
sizeof(__v) <= 16 && 
sizeof(_To) == 64;
 
   50    [[maybe_unused]] 
constexpr bool __y_to_x
 
   51      = 
sizeof(__v) == 32 && 
sizeof(_To) <= 16;
 
   52    [[maybe_unused]] 
constexpr bool __y_to_y
 
   53      = 
sizeof(__v) == 32 && 
sizeof(_To) == 32;
 
   54    [[maybe_unused]] 
constexpr bool __y_to_z
 
   55      = 
sizeof(__v) == 32 && 
sizeof(_To) == 64;
 
   56    [[maybe_unused]] 
constexpr bool __z_to_x
 
   57      = 
sizeof(__v) == 64 && 
sizeof(_To) <= 16;
 
   58    [[maybe_unused]] 
constexpr bool __z_to_y
 
   59      = 
sizeof(__v) == 64 && 
sizeof(_To) == 32;
 
   60    [[maybe_unused]] 
constexpr bool __z_to_z
 
   61      = 
sizeof(__v) == 64 && 
sizeof(_To) == 64;
 
   64    [[maybe_unused]] 
constexpr bool __i_to_i
 
   65      = is_integral_v<_Up> && is_integral_v<_Tp>;
 
   66    [[maybe_unused]] 
constexpr bool __i8_to_i16
 
   67      = __i_to_i && 
sizeof(_Tp) == 1 && 
sizeof(_Up) == 2;
 
   68    [[maybe_unused]] 
constexpr bool __i8_to_i32
 
   69      = __i_to_i && 
sizeof(_Tp) == 1 && 
sizeof(_Up) == 4;
 
   70    [[maybe_unused]] 
constexpr bool __i8_to_i64
 
   71      = __i_to_i && 
sizeof(_Tp) == 1 && 
sizeof(_Up) == 8;
 
   72    [[maybe_unused]] 
constexpr bool __i16_to_i8
 
   73      = __i_to_i && 
sizeof(_Tp) == 2 && 
sizeof(_Up) == 1;
 
   74    [[maybe_unused]] 
constexpr bool __i16_to_i32
 
   75      = __i_to_i && 
sizeof(_Tp) == 2 && 
sizeof(_Up) == 4;
 
   76    [[maybe_unused]] 
constexpr bool __i16_to_i64
 
   77      = __i_to_i && 
sizeof(_Tp) == 2 && 
sizeof(_Up) == 8;
 
   78    [[maybe_unused]] 
constexpr bool __i32_to_i8
 
   79      = __i_to_i && 
sizeof(_Tp) == 4 && 
sizeof(_Up) == 1;
 
   80    [[maybe_unused]] 
constexpr bool __i32_to_i16
 
   81      = __i_to_i && 
sizeof(_Tp) == 4 && 
sizeof(_Up) == 2;
 
   82    [[maybe_unused]] 
constexpr bool __i32_to_i64
 
   83      = __i_to_i && 
sizeof(_Tp) == 4 && 
sizeof(_Up) == 8;
 
   84    [[maybe_unused]] 
constexpr bool __i64_to_i8
 
   85      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 1;
 
   86    [[maybe_unused]] 
constexpr bool __i64_to_i16
 
   87      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 2;
 
   88    [[maybe_unused]] 
constexpr bool __i64_to_i32
 
   89      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 4;
 
   93    [[maybe_unused]] 
constexpr bool __s64_to_f32
 
   94      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 8
 
   95        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
   96    [[maybe_unused]] 
constexpr bool __s32_to_f32
 
   97      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 4
 
   98        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
   99    [[maybe_unused]] 
constexpr bool __s16_to_f32
 
  100      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 2
 
  101        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  102    [[maybe_unused]] 
constexpr bool __s8_to_f32
 
  103      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 1
 
  104        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  105    [[maybe_unused]] 
constexpr bool __u64_to_f32
 
  106      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 8
 
  107        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  108    [[maybe_unused]] 
constexpr bool __u32_to_f32
 
  109      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 4
 
  110        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  111    [[maybe_unused]] 
constexpr bool __u16_to_f32
 
  112      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 2
 
  113        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  114    [[maybe_unused]] 
constexpr bool __u8_to_f32
 
  115      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 1
 
  116        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  117    [[maybe_unused]] 
constexpr bool __s64_to_f64
 
  118      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 8
 
  119        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  120    [[maybe_unused]] 
constexpr bool __s32_to_f64
 
  121      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 4
 
  122        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  123    [[maybe_unused]] 
constexpr bool __u64_to_f64
 
  124      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 8
 
  125        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  126    [[maybe_unused]] 
constexpr bool __u32_to_f64
 
  127      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 4
 
  128        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  129    [[maybe_unused]] 
constexpr bool __f32_to_s64
 
  130      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 8
 
  131        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
  132    [[maybe_unused]] 
constexpr bool __f32_to_s32
 
  133      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 4
 
  134        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
  135    [[maybe_unused]] 
constexpr bool __f32_to_u64
 
  136      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 8
 
  137        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
  138    [[maybe_unused]] 
constexpr bool __f32_to_u32
 
  139      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 4
 
  140        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
  141    [[maybe_unused]] 
constexpr bool __f64_to_s64
 
  142      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 8
 
  143        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
  144    [[maybe_unused]] 
constexpr bool __f64_to_s32
 
  145      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 4
 
  146        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
  147    [[maybe_unused]] 
constexpr bool __f64_to_u64
 
  148      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 8
 
  149        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
  150    [[maybe_unused]] 
constexpr bool __f64_to_u32
 
  151      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 4
 
  152        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
  153    [[maybe_unused]] 
constexpr bool __ibw_to_f32
 
  154      = is_integral_v<_Tp> && 
sizeof(_Tp) <= 2
 
  155        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  156    [[maybe_unused]] 
constexpr bool __ibw_to_f64
 
  157      = is_integral_v<_Tp> && 
sizeof(_Tp) <= 2
 
  158        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  159    [[maybe_unused]] 
constexpr bool __f32_to_ibw
 
  160      = is_integral_v<_Up> && 
sizeof(_Up) <= 2
 
  161        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
  162    [[maybe_unused]] 
constexpr bool __f64_to_ibw
 
  163      = is_integral_v<_Up> && 
sizeof(_Up) <= 2
 
  164        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
  165    [[maybe_unused]] 
constexpr bool __f32_to_f64
 
  166      = is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4
 
  167        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  168    [[maybe_unused]] 
constexpr bool __f64_to_f32
 
  169      = is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8
 
  170        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  172    if constexpr (__i_to_i && __y_to_x && !__have_avx2) 
 
  173      return __convert_x86<_To>(__lo128(__v), __hi128(__v));
 
  174    else if constexpr (__i_to_i && __x_to_y && !__have_avx2) 
 
  175      return __concat(__convert_x86<__vector_type_t<_Up, _M / 2>>(__v),
 
  176                      __convert_x86<__vector_type_t<_Up, _M / 2>>(
 
  177                        __extract_part<1, _Np / _M * 2>(__v)));
 
  178    else if constexpr (__i_to_i) 
 
  180        static_assert(__x_to_x || __have_avx2,
 
  181                      "integral conversions with ymm registers require AVX2");
 
  182        static_assert(__have_avx512bw
 
  183                        || ((
sizeof(_Tp) >= 4 || 
sizeof(__v) < 64)
 
  184                            && (
sizeof(_Up) >= 4 || 
sizeof(_To) < 64)),
 
  185                      "8/16-bit integers in zmm registers require AVX512BW");
 
  186        static_assert((
sizeof(__v) < 64 && 
sizeof(_To) < 64) || __have_avx512f,
 
  187                      "integral conversions with ymm registers require AVX2");
 
  189    if constexpr (is_floating_point_v<_Tp> == is_floating_point_v<_Up> && 
 
  190                  sizeof(_Tp) == 
sizeof(_Up))
 
  193        if constexpr (_Np >= _M)
 
  194          return __intrin_bitcast<_To>(__v);
 
  196          return __zero_extend(__vector_bitcast<_Up>(__v));
 
  198    else if constexpr (_Np < _M && 
sizeof(_To) > 16) 
 
  200      return __zero_extend(
 
  201        __convert_x86<__vector_type_t<
 
  202          _Up, (16 / 
sizeof(_Up) > _Np) ? 16 / 
sizeof(_Up) : _Np>>(__v));
 
  203    else if constexpr (_Np > _M && 
sizeof(__v) > 16) 
 
  205      return __convert_x86<_To>(__extract_part<0, _Np / _M>(__v));
 
  206    else if constexpr (__i64_to_i32) 
 
  208        if constexpr (__x_to_x && __have_avx512vl)
 
  209          return __intrin_bitcast<_To>(_mm_cvtepi64_epi32(__intrin));
 
  210        else if constexpr (__x_to_x)
 
  211          return __auto_bitcast(
 
  212            _mm_shuffle_ps(__vector_bitcast<float>(__v), __m128(), 8));
 
  213        else if constexpr (__y_to_x && __have_avx512vl)
 
  214          return __intrin_bitcast<_To>(_mm256_cvtepi64_epi32(__intrin));
 
  215        else if constexpr (__y_to_x && __have_avx512f)
 
  216          return __intrin_bitcast<_To>(
 
  217            __lo128(_mm512_cvtepi64_epi32(__auto_bitcast(__v))));
 
  218        else if constexpr (__y_to_x)
 
  219          return __intrin_bitcast<_To>(
 
  220            __lo128(_mm256_permute4x64_epi64(_mm256_shuffle_epi32(__intrin, 8),
 
  222        else if constexpr (__z_to_y)
 
  223          return __intrin_bitcast<_To>(_mm512_cvtepi64_epi32(__intrin));
 
  225    else if constexpr (__i64_to_i16) 
 
  227        if constexpr (__x_to_x && __have_avx512vl)
 
  228          return __intrin_bitcast<_To>(_mm_cvtepi64_epi16(__intrin));
 
  229        else if constexpr (__x_to_x && __have_avx512f)
 
  230          return __intrin_bitcast<_To>(
 
  231            __lo128(_mm512_cvtepi64_epi16(__auto_bitcast(__v))));
 
  232        else if constexpr (__x_to_x && __have_ssse3)
 
  234            return __intrin_bitcast<_To>(
 
  235              _mm_shuffle_epi8(__intrin,
 
  236                               _mm_setr_epi8(0, 1, 8, 9, -0x80, -0x80, -0x80,
 
  237                                             -0x80, -0x80, -0x80, -0x80, -0x80,
 
  238                                             -0x80, -0x80, -0x80, -0x80)));
 
  241        else if constexpr (__y_to_x && __have_avx512vl)
 
  242          return __intrin_bitcast<_To>(_mm256_cvtepi64_epi16(__intrin));
 
  243        else if constexpr (__y_to_x && __have_avx512f)
 
  244          return __intrin_bitcast<_To>(
 
  245            __lo128(_mm512_cvtepi64_epi16(__auto_bitcast(__v))));
 
  246        else if constexpr (__y_to_x)
 
  248            const auto __a = _mm256_shuffle_epi8(
 
  250              _mm256_setr_epi8(0, 1, 8, 9, -0x80, -0x80, -0x80, -0x80, -0x80,
 
  251                               -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
 
  252                               -0x80, -0x80, -0x80, -0x80, 0, 1, 8, 9, -0x80,
 
  253                               -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
 
  255            return __intrin_bitcast<_To>(__lo128(__a) | __hi128(__a));
 
  257        else if constexpr (__z_to_x)
 
  258          return __intrin_bitcast<_To>(_mm512_cvtepi64_epi16(__intrin));
 
  260    else if constexpr (__i64_to_i8) 
 
  262        if constexpr (__x_to_x && __have_avx512vl)
 
  263          return __intrin_bitcast<_To>(_mm_cvtepi64_epi8(__intrin));
 
  264        else if constexpr (__x_to_x && __have_avx512f)
 
  265          return __intrin_bitcast<_To>(
 
  266            __lo128(_mm512_cvtepi64_epi8(__zero_extend(__intrin))));
 
  267        else if constexpr (__y_to_x && __have_avx512vl)
 
  268          return __intrin_bitcast<_To>(_mm256_cvtepi64_epi8(__intrin));
 
  269        else if constexpr (__y_to_x && __have_avx512f)
 
  270          return __intrin_bitcast<_To>(
 
  271            _mm512_cvtepi64_epi8(__zero_extend(__intrin)));
 
  272        else if constexpr (__z_to_x)
 
  273          return __intrin_bitcast<_To>(_mm512_cvtepi64_epi8(__intrin));
 
  275    else if constexpr (__i32_to_i64) 
 
  277        if constexpr (__have_sse4_1 && __x_to_x)
 
  278          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  279                                         ? _mm_cvtepi32_epi64(__intrin)
 
  280                                         : _mm_cvtepu32_epi64(__intrin));
 
  281        else if constexpr (__x_to_x)
 
  283            return __intrin_bitcast<_To>(
 
  284              _mm_unpacklo_epi32(__intrin, is_signed_v<_Tp>
 
  285                                             ? _mm_srai_epi32(__intrin, 31)
 
  288        else if constexpr (__x_to_y)
 
  289          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  290                                         ? _mm256_cvtepi32_epi64(__intrin)
 
  291                                         : _mm256_cvtepu32_epi64(__intrin));
 
  292        else if constexpr (__y_to_z)
 
  293          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  294                                         ? _mm512_cvtepi32_epi64(__intrin)
 
  295                                         : _mm512_cvtepu32_epi64(__intrin));
 
  297    else if constexpr (__i32_to_i16) 
 
  299        if constexpr (__x_to_x && __have_avx512vl)
 
  300          return __intrin_bitcast<_To>(_mm_cvtepi32_epi16(__intrin));
 
  301        else if constexpr (__x_to_x && __have_avx512f)
 
  302          return __intrin_bitcast<_To>(
 
  303            __lo128(_mm512_cvtepi32_epi16(__auto_bitcast(__v))));
 
  304        else if constexpr (__x_to_x && __have_ssse3)
 
  305          return __intrin_bitcast<_To>(_mm_shuffle_epi8(
 
  306            __intrin, _mm_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80,
 
  307                                    -0x80, -0x80, -0x80, -0x80, -0x80, -0x80)));
 
  308        else if constexpr (__x_to_x)
 
  310            auto __a = _mm_unpacklo_epi16(__intrin, __m128i()); 
 
  311            auto __b = _mm_unpackhi_epi16(__intrin, __m128i()); 
 
  312            auto __c = _mm_unpacklo_epi16(__a, __b);            
 
  313            auto __d = _mm_unpackhi_epi16(__a, __b);            
 
  314            return __intrin_bitcast<_To>(
 
  315              _mm_unpacklo_epi16(__c, __d)); 
 
  317        else if constexpr (__y_to_x && __have_avx512vl)
 
  318          return __intrin_bitcast<_To>(_mm256_cvtepi32_epi16(__intrin));
 
  319        else if constexpr (__y_to_x && __have_avx512f)
 
  320          return __intrin_bitcast<_To>(
 
  321            __lo128(_mm512_cvtepi32_epi16(__auto_bitcast(__v))));
 
  322        else if constexpr (__y_to_x)
 
  324            auto __a = _mm256_shuffle_epi8(
 
  326              _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80, -0x80,
 
  327                               -0x80, -0x80, -0x80, -0x80, -0x80, 0, 1, 4, 5, 8,
 
  328                               9, 12, 13, -0x80, -0x80, -0x80, -0x80, -0x80,
 
  329                               -0x80, -0x80, -0x80));
 
  330            return __intrin_bitcast<_To>(__lo128(
 
  331              _mm256_permute4x64_epi64(__a,
 
  334        else if constexpr (__z_to_y)
 
  335          return __intrin_bitcast<_To>(_mm512_cvtepi32_epi16(__intrin));
 
  337    else if constexpr (__i32_to_i8) 
 
  339        if constexpr (__x_to_x && __have_avx512vl)
 
  340          return __intrin_bitcast<_To>(_mm_cvtepi32_epi8(__intrin));
 
  341        else if constexpr (__x_to_x && __have_avx512f)
 
  342          return __intrin_bitcast<_To>(
 
  343            __lo128(_mm512_cvtepi32_epi8(__zero_extend(__intrin))));
 
  344        else if constexpr (__x_to_x && __have_ssse3)
 
  346            return __intrin_bitcast<_To>(
 
  347              _mm_shuffle_epi8(__intrin,
 
  348                               _mm_setr_epi8(0, 4, 8, 12, -0x80, -0x80, -0x80,
 
  349                                             -0x80, -0x80, -0x80, -0x80, -0x80,
 
  350                                             -0x80, -0x80, -0x80, -0x80)));
 
  352        else if constexpr (__x_to_x)
 
  355              = _mm_unpacklo_epi8(__intrin, __intrin); 
 
  357              = _mm_unpackhi_epi8(__intrin, __intrin);    
 
  358            const auto __c = _mm_unpacklo_epi8(__a, __b); 
 
  359            const auto __d = _mm_unpackhi_epi8(__a, __b); 
 
  360            const auto __e = _mm_unpacklo_epi8(__c, __d); 
 
  361            return __intrin_bitcast<_To>(__e & _mm_cvtsi32_si128(-1));
 
  363        else if constexpr (__y_to_x && __have_avx512vl)
 
  364          return __intrin_bitcast<_To>(_mm256_cvtepi32_epi8(__intrin));
 
  365        else if constexpr (__y_to_x && __have_avx512f)
 
  366          return __intrin_bitcast<_To>(
 
  367            _mm512_cvtepi32_epi8(__zero_extend(__intrin)));
 
  368        else if constexpr (__z_to_x)
 
  369          return __intrin_bitcast<_To>(_mm512_cvtepi32_epi8(__intrin));
 
  371    else if constexpr (__i16_to_i64) 
 
  373        if constexpr (__x_to_x && __have_sse4_1)
 
  374          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  375                                         ? _mm_cvtepi16_epi64(__intrin)
 
  376                                         : _mm_cvtepu16_epi64(__intrin));
 
  377        else if constexpr (__x_to_x && is_signed_v<_Tp>)
 
  379            auto __x = _mm_srai_epi16(__intrin, 15);
 
  380            auto __y = _mm_unpacklo_epi16(__intrin, __x);
 
  381            __x = _mm_unpacklo_epi16(__x, __x);
 
  382            return __intrin_bitcast<_To>(_mm_unpacklo_epi32(__y, __x));
 
  384        else if constexpr (__x_to_x)
 
  385          return __intrin_bitcast<_To>(
 
  386            _mm_unpacklo_epi32(_mm_unpacklo_epi16(__intrin, __m128i()),
 
  388        else if constexpr (__x_to_y)
 
  389          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  390                                         ? _mm256_cvtepi16_epi64(__intrin)
 
  391                                         : _mm256_cvtepu16_epi64(__intrin));
 
  392        else if constexpr (__x_to_z)
 
  393          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  394                                         ? _mm512_cvtepi16_epi64(__intrin)
 
  395                                         : _mm512_cvtepu16_epi64(__intrin));
 
  397    else if constexpr (__i16_to_i32) 
 
  399        if constexpr (__x_to_x && __have_sse4_1)
 
  400          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  401                                         ? _mm_cvtepi16_epi32(__intrin)
 
  402                                         : _mm_cvtepu16_epi32(__intrin));
 
  403        else if constexpr (__x_to_x && is_signed_v<_Tp>)
 
  404          return __intrin_bitcast<_To>(
 
  405            _mm_srai_epi32(_mm_unpacklo_epi16(__intrin, __intrin), 16));
 
  406        else if constexpr (__x_to_x && is_unsigned_v<_Tp>)
 
  407          return __intrin_bitcast<_To>(_mm_unpacklo_epi16(__intrin, __m128i()));
 
  408        else if constexpr (__x_to_y)
 
  409          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  410                                         ? _mm256_cvtepi16_epi32(__intrin)
 
  411                                         : _mm256_cvtepu16_epi32(__intrin));
 
  412        else if constexpr (__y_to_z)
 
  413          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  414                                         ? _mm512_cvtepi16_epi32(__intrin)
 
  415                                         : _mm512_cvtepu16_epi32(__intrin));
 
  417    else if constexpr (__i16_to_i8) 
 
  419        if constexpr (__x_to_x && __have_avx512bw_vl)
 
  420          return __intrin_bitcast<_To>(_mm_cvtepi16_epi8(__intrin));
 
  421        else if constexpr (__x_to_x && __have_avx512bw)
 
  422          return __intrin_bitcast<_To>(
 
  423            __lo128(_mm512_cvtepi16_epi8(__zero_extend(__intrin))));
 
  424        else if constexpr (__x_to_x && __have_ssse3)
 
  425          return __intrin_bitcast<_To>(_mm_shuffle_epi8(
 
  426            __intrin, _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -0x80, -0x80,
 
  427                                    -0x80, -0x80, -0x80, -0x80, -0x80, -0x80)));
 
  428        else if constexpr (__x_to_x)
 
  431              = _mm_unpacklo_epi8(__intrin, __intrin); 
 
  433              = _mm_unpackhi_epi8(__intrin, __intrin); 
 
  434            auto __c = _mm_unpacklo_epi8(__a, __b);    
 
  435            auto __d = _mm_unpackhi_epi8(__a, __b);    
 
  436            auto __e = _mm_unpacklo_epi8(__c, __d);    
 
  437            auto __f = _mm_unpackhi_epi8(__c, __d);    
 
  438            return __intrin_bitcast<_To>(_mm_unpacklo_epi8(__e, __f));
 
  440        else if constexpr (__y_to_x && __have_avx512bw_vl)
 
  441          return __intrin_bitcast<_To>(_mm256_cvtepi16_epi8(__intrin));
 
  442        else if constexpr (__y_to_x && __have_avx512bw)
 
  443          return __intrin_bitcast<_To>(
 
  444            __lo256(_mm512_cvtepi16_epi8(__zero_extend(__intrin))));
 
  445        else if constexpr (__y_to_x)
 
  447            auto __a = _mm256_shuffle_epi8(
 
  449              _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, -0x80, -0x80, -0x80,
 
  450                               -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
 
  451                               -0x80, -0x80, -0x80, -0x80, -0x80, -0x80, 0, 2,
 
  452                               4, 6, 8, 10, 12, 14));
 
  453            return __intrin_bitcast<_To>(__lo128(__a) | __hi128(__a));
 
  455        else if constexpr (__z_to_y && __have_avx512bw)
 
  456          return __intrin_bitcast<_To>(_mm512_cvtepi16_epi8(__intrin));
 
  457        else if constexpr (__z_to_y)
 
  458          __assert_unreachable<_Tp>();
 
  460    else if constexpr (__i8_to_i64) 
 
  462        if constexpr (__x_to_x && __have_sse4_1)
 
  463          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  464                                         ? _mm_cvtepi8_epi64(__intrin)
 
  465                                         : _mm_cvtepu8_epi64(__intrin));
 
  466        else if constexpr (__x_to_x && is_signed_v<_Tp>)
 
  468            if constexpr (__have_ssse3)
 
  470                auto __dup = _mm_unpacklo_epi8(__intrin, __intrin);
 
  471                auto __epi16 = _mm_srai_epi16(__dup, 8);
 
  472                _mm_shuffle_epi8(__epi16,
 
  473                                 _mm_setr_epi8(0, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3,
 
  478                auto __x = _mm_unpacklo_epi8(__intrin, __intrin);
 
  479                __x = _mm_unpacklo_epi16(__x, __x);
 
  480                return __intrin_bitcast<_To>(
 
  481                  _mm_unpacklo_epi32(_mm_srai_epi32(__x, 24),
 
  482                                     _mm_srai_epi32(__x, 31)));
 
  485        else if constexpr (__x_to_x)
 
  487            return __intrin_bitcast<_To>(_mm_unpacklo_epi32(
 
  488              _mm_unpacklo_epi16(_mm_unpacklo_epi8(__intrin, __m128i()),
 
  492        else if constexpr (__x_to_y)
 
  493          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  494                                         ? _mm256_cvtepi8_epi64(__intrin)
 
  495                                         : _mm256_cvtepu8_epi64(__intrin));
 
  496        else if constexpr (__x_to_z)
 
  497          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  498                                         ? _mm512_cvtepi8_epi64(__intrin)
 
  499                                         : _mm512_cvtepu8_epi64(__intrin));
 
  501    else if constexpr (__i8_to_i32) 
 
  503        if constexpr (__x_to_x && __have_sse4_1)
 
  504          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  505                                         ? _mm_cvtepi8_epi32(__intrin)
 
  506                                         : _mm_cvtepu8_epi32(__intrin));
 
  507        else if constexpr (__x_to_x && is_signed_v<_Tp>)
 
  509            const auto __x = _mm_unpacklo_epi8(__intrin, __intrin);
 
  510            return __intrin_bitcast<_To>(
 
  511              _mm_srai_epi32(_mm_unpacklo_epi16(__x, __x), 24));
 
  513        else if constexpr (__x_to_x && is_unsigned_v<_Tp>)
 
  514          return __intrin_bitcast<_To>(
 
  515            _mm_unpacklo_epi16(_mm_unpacklo_epi8(__intrin, __m128i()),
 
  517        else if constexpr (__x_to_y)
 
  518          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  519                                         ? _mm256_cvtepi8_epi32(__intrin)
 
  520                                         : _mm256_cvtepu8_epi32(__intrin));
 
  521        else if constexpr (__x_to_z)
 
  522          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  523                                         ? _mm512_cvtepi8_epi32(__intrin)
 
  524                                         : _mm512_cvtepu8_epi32(__intrin));
 
  526    else if constexpr (__i8_to_i16) 
 
  528        if constexpr (__x_to_x && __have_sse4_1)
 
  529          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  530                                         ? _mm_cvtepi8_epi16(__intrin)
 
  531                                         : _mm_cvtepu8_epi16(__intrin));
 
  532        else if constexpr (__x_to_x && is_signed_v<_Tp>)
 
  533          return __intrin_bitcast<_To>(
 
  534            _mm_srai_epi16(_mm_unpacklo_epi8(__intrin, __intrin), 8));
 
  535        else if constexpr (__x_to_x && is_unsigned_v<_Tp>)
 
  536          return __intrin_bitcast<_To>(_mm_unpacklo_epi8(__intrin, __m128i()));
 
  537        else if constexpr (__x_to_y)
 
  538          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  539                                         ? _mm256_cvtepi8_epi16(__intrin)
 
  540                                         : _mm256_cvtepu8_epi16(__intrin));
 
  541        else if constexpr (__y_to_z && __have_avx512bw)
 
  542          return __intrin_bitcast<_To>(is_signed_v<_Tp>
 
  543                                         ? _mm512_cvtepi8_epi16(__intrin)
 
  544                                         : _mm512_cvtepu8_epi16(__intrin));
 
  545        else if constexpr (__y_to_z)
 
  546          __assert_unreachable<_Tp>();
 
  548    else if constexpr (__f32_to_s64) 
 
  550        if constexpr (__have_avx512dq_vl && __x_to_x)
 
  551          return __intrin_bitcast<_To>(_mm_cvttps_epi64(__intrin));
 
  552        else if constexpr (__have_avx512dq_vl && __x_to_y)
 
  553          return __intrin_bitcast<_To>(_mm256_cvttps_epi64(__intrin));
 
  554        else if constexpr (__have_avx512dq && __y_to_z)
 
  555          return __intrin_bitcast<_To>(_mm512_cvttps_epi64(__intrin));
 
  558    else if constexpr (__f32_to_u64) 
 
  560        if constexpr (__have_avx512dq_vl && __x_to_x)
 
  561          return __intrin_bitcast<_To>(_mm_cvttps_epu64(__intrin));
 
  562        else if constexpr (__have_avx512dq_vl && __x_to_y)
 
  563          return __intrin_bitcast<_To>(_mm256_cvttps_epu64(__intrin));
 
  564        else if constexpr (__have_avx512dq && __y_to_z)
 
  565          return __intrin_bitcast<_To>(_mm512_cvttps_epu64(__intrin));
 
  568    else if constexpr (__f32_to_s32) 
 
  570        if constexpr (__x_to_x || __y_to_y || __z_to_z)
 
  575          __assert_unreachable<_Tp>();
 
  577    else if constexpr (__f32_to_u32) 
 
  579        if constexpr (__have_avx512vl && __x_to_x)
 
  580          return __auto_bitcast(_mm_cvttps_epu32(__intrin));
 
  581        else if constexpr (__have_avx512f && __x_to_x)
 
  582          return __auto_bitcast(
 
  583            __lo128(_mm512_cvttps_epu32(__auto_bitcast(__v))));
 
  584        else if constexpr (__have_avx512vl && __y_to_y)
 
  585          return __vector_bitcast<_Up>(_mm256_cvttps_epu32(__intrin));
 
  586        else if constexpr (__have_avx512f && __y_to_y)
 
  587          return __vector_bitcast<_Up>(
 
  588            __lo256(_mm512_cvttps_epu32(__auto_bitcast(__v))));
 
  589        else if constexpr (__x_to_x || __y_to_y || __z_to_z)
 
  596          __assert_unreachable<_Tp>();
 
  598    else if constexpr (__f32_to_ibw) 
 
  599      return __convert_x86<_To>(__convert_x86<__vector_type_t<int, _Np>>(__v));
 
  600    else if constexpr (__f64_to_s64) 
 
  602        if constexpr (__have_avx512dq_vl && __x_to_x)
 
  603          return __intrin_bitcast<_To>(_mm_cvttpd_epi64(__intrin));
 
  604        else if constexpr (__have_avx512dq_vl && __y_to_y)
 
  605          return __intrin_bitcast<_To>(_mm256_cvttpd_epi64(__intrin));
 
  606        else if constexpr (__have_avx512dq && __z_to_z)
 
  607          return __intrin_bitcast<_To>(_mm512_cvttpd_epi64(__intrin));
 
  610    else if constexpr (__f64_to_u64) 
 
  612        if constexpr (__have_avx512dq_vl && __x_to_x)
 
  613          return __intrin_bitcast<_To>(_mm_cvttpd_epu64(__intrin));
 
  614        else if constexpr (__have_avx512dq_vl && __y_to_y)
 
  615          return __intrin_bitcast<_To>(_mm256_cvttpd_epu64(__intrin));
 
  616        else if constexpr (__have_avx512dq && __z_to_z)
 
  617          return __intrin_bitcast<_To>(_mm512_cvttpd_epu64(__intrin));
 
  620    else if constexpr (__f64_to_s32) 
 
  622        if constexpr (__x_to_x)
 
  623          return __intrin_bitcast<_To>(_mm_cvttpd_epi32(__intrin));
 
  624        else if constexpr (__y_to_x)
 
  625          return __intrin_bitcast<_To>(_mm256_cvttpd_epi32(__intrin));
 
  626        else if constexpr (__z_to_y)
 
  627          return __intrin_bitcast<_To>(_mm512_cvttpd_epi32(__intrin));
 
  629    else if constexpr (__f64_to_u32) 
 
  631        if constexpr (__have_avx512vl && __x_to_x)
 
  632          return __intrin_bitcast<_To>(_mm_cvttpd_epu32(__intrin));
 
  633        else if constexpr (__have_sse4_1 && __x_to_x)
 
  634          return __vector_bitcast<_Up, _M>(
 
  635                   _mm_cvttpd_epi32(_mm_floor_pd(__intrin) - 0x8000'0000u))
 
  637        else if constexpr (__x_to_x)
 
  642        else if constexpr (__have_avx512vl && __y_to_x)
 
  643          return __intrin_bitcast<_To>(_mm256_cvttpd_epu32(__intrin));
 
  644        else if constexpr (__y_to_x)
 
  646            return __intrin_bitcast<_To>(
 
  647              __vector_bitcast<_Up>(
 
  648                _mm256_cvttpd_epi32(_mm256_floor_pd(__intrin) - 0x8000'0000u))
 
  651        else if constexpr (__z_to_y)
 
  652          return __intrin_bitcast<_To>(_mm512_cvttpd_epu32(__intrin));
 
  654    else if constexpr (__f64_to_ibw) 
 
  656        return __convert_x86<_To>(
 
  657          __convert_x86<__vector_type_t<
int, (_Np < 4 ? 4 : _Np)>>(__v));
 
  659    else if constexpr (__s64_to_f32) 
 
  661        if constexpr (__x_to_x && __have_avx512dq_vl)
 
  662          return __intrin_bitcast<_To>(_mm_cvtepi64_ps(__intrin));
 
  663        else if constexpr (__y_to_x && __have_avx512dq_vl)
 
  664          return __intrin_bitcast<_To>(_mm256_cvtepi64_ps(__intrin));
 
  665        else if constexpr (__z_to_y && __have_avx512dq)
 
  666          return __intrin_bitcast<_To>(_mm512_cvtepi64_ps(__intrin));
 
  667        else if constexpr (__z_to_y)
 
  668          return __intrin_bitcast<_To>(
 
  669            _mm512_cvtpd_ps(__convert_x86<__vector_type_t<double, 8>>(__v)));
 
  671    else if constexpr (__u64_to_f32) 
 
  673        if constexpr (__x_to_x && __have_avx512dq_vl)
 
  674          return __intrin_bitcast<_To>(_mm_cvtepu64_ps(__intrin));
 
  675        else if constexpr (__y_to_x && __have_avx512dq_vl)
 
  676          return __intrin_bitcast<_To>(_mm256_cvtepu64_ps(__intrin));
 
  677        else if constexpr (__z_to_y && __have_avx512dq)
 
  678          return __intrin_bitcast<_To>(_mm512_cvtepu64_ps(__intrin));
 
  679        else if constexpr (__z_to_y)
 
  681            return __intrin_bitcast<_To>(
 
  682              __lo256(_mm512_cvtepu32_ps(__auto_bitcast(
 
  683                _mm512_cvtepi64_epi32(_mm512_srai_epi64(__intrin, 32)))))
 
  685              + __lo256(_mm512_cvtepu32_ps(
 
  686                __auto_bitcast(_mm512_cvtepi64_epi32(__intrin)))));
 
  689    else if constexpr (__s32_to_f32) 
 
  693    else if constexpr (__u32_to_f32) 
 
  695        if constexpr (__x_to_x && __have_avx512vl)
 
  699        else if constexpr (__x_to_x && __have_avx512f)
 
  700          return __intrin_bitcast<_To>(
 
  701            __lo128(_mm512_cvtepu32_ps(__auto_bitcast(__v))));
 
  702        else if constexpr (__x_to_x && (__have_fma || __have_fma4))
 
  704          return __auto_bitcast(0x10000
 
  705                                  * _mm_cvtepi32_ps(__to_intrin(__v >> 16))
 
  706                                + _mm_cvtepi32_ps(__to_intrin(__v & 0xffff)));
 
  707        else if constexpr (__y_to_y && __have_avx512vl)
 
  711        else if constexpr (__y_to_y && __have_avx512f)
 
  712          return __intrin_bitcast<_To>(
 
  713            __lo256(_mm512_cvtepu32_ps(__auto_bitcast(__v))));
 
  714        else if constexpr (__y_to_y)
 
  716          return 0x10000 * _mm256_cvtepi32_ps(__to_intrin(__v >> 16))
 
  717                 + _mm256_cvtepi32_ps(__to_intrin(__v & 0xffff));
 
  720    else if constexpr (__ibw_to_f32) 
 
  722        if constexpr (_M <= 4 || __have_avx2)
 
  723          return __convert_x86<_To>(
 
  724            __convert_x86<__vector_type_t<int, _M>>(__v));
 
  727            static_assert(__x_to_y);
 
  729            if constexpr (__have_sse4_1)
 
  731                __a = 
sizeof(_Tp) == 2
 
  732                        ? (is_signed_v<_Tp> ? _mm_cvtepi16_epi32(__intrin)
 
  733                                            : _mm_cvtepu16_epi32(__intrin))
 
  734                        : (is_signed_v<_Tp> ? _mm_cvtepi8_epi32(__intrin)
 
  735                                            : _mm_cvtepu8_epi32(__intrin));
 
  737                  = _mm_shuffle_epi32(__intrin, 
sizeof(_Tp) == 2 ? 0xee : 0xe9);
 
  738                __b = 
sizeof(_Tp) == 2
 
  739                        ? (is_signed_v<_Tp> ? _mm_cvtepi16_epi32(__w)
 
  740                                            : _mm_cvtepu16_epi32(__w))
 
  741                        : (is_signed_v<_Tp> ? _mm_cvtepi8_epi32(__w)
 
  742                                            : _mm_cvtepu8_epi32(__w));
 
  747                if constexpr (
sizeof(_Tp) == 1)
 
  749                    __tmp = is_signed_v<_Tp>
 
  750                              ? _mm_srai_epi16(_mm_unpacklo_epi8(__intrin,
 
  753                              : _mm_unpacklo_epi8(__intrin, __m128i());
 
  757                    static_assert(
sizeof(_Tp) == 2);
 
  760                __a = is_signed_v<_Tp>
 
  761                        ? _mm_srai_epi32(_mm_unpacklo_epi16(__tmp, __tmp), 16)
 
  762                        : _mm_unpacklo_epi16(__tmp, __m128i());
 
  763                __b = is_signed_v<_Tp>
 
  764                        ? _mm_srai_epi32(_mm_unpackhi_epi16(__tmp, __tmp), 16)
 
  765                        : _mm_unpackhi_epi16(__tmp, __m128i());
 
  767            return __convert_x86<_To>(__vector_bitcast<int>(__a),
 
  768                                      __vector_bitcast<int>(__b));
 
  771    else if constexpr (__s64_to_f64) 
 
  773        if constexpr (__x_to_x && __have_avx512dq_vl)
 
  774          return __intrin_bitcast<_To>(_mm_cvtepi64_pd(__intrin));
 
  775        else if constexpr (__y_to_y && __have_avx512dq_vl)
 
  776          return __intrin_bitcast<_To>(_mm256_cvtepi64_pd(__intrin));
 
  777        else if constexpr (__z_to_z && __have_avx512dq)
 
  778          return __intrin_bitcast<_To>(_mm512_cvtepi64_pd(__intrin));
 
  779        else if constexpr (__z_to_z)
 
  781            return __intrin_bitcast<_To>(
 
  782              _mm512_cvtepi32_pd(_mm512_cvtepi64_epi32(__to_intrin(__v >> 32)))
 
  784              + _mm512_cvtepu32_pd(_mm512_cvtepi64_epi32(__intrin)));
 
  787    else if constexpr (__u64_to_f64) 
 
  789        if constexpr (__x_to_x && __have_avx512dq_vl)
 
  790          return __intrin_bitcast<_To>(_mm_cvtepu64_pd(__intrin));
 
  791        else if constexpr (__y_to_y && __have_avx512dq_vl)
 
  792          return __intrin_bitcast<_To>(_mm256_cvtepu64_pd(__intrin));
 
  793        else if constexpr (__z_to_z && __have_avx512dq)
 
  794          return __intrin_bitcast<_To>(_mm512_cvtepu64_pd(__intrin));
 
  795        else if constexpr (__z_to_z)
 
  797            return __intrin_bitcast<_To>(
 
  798              _mm512_cvtepu32_pd(_mm512_cvtepi64_epi32(__to_intrin(__v >> 32)))
 
  800              + _mm512_cvtepu32_pd(_mm512_cvtepi64_epi32(__intrin)));
 
  803    else if constexpr (__s32_to_f64) 
 
  805        if constexpr (__x_to_x)
 
  806          return __intrin_bitcast<_To>(_mm_cvtepi32_pd(__intrin));
 
  807        else if constexpr (__x_to_y)
 
  808          return __intrin_bitcast<_To>(_mm256_cvtepi32_pd(__intrin));
 
  809        else if constexpr (__y_to_z)
 
  810          return __intrin_bitcast<_To>(_mm512_cvtepi32_pd(__intrin));
 
  812    else if constexpr (__u32_to_f64) 
 
  814        if constexpr (__x_to_x && __have_avx512vl)
 
  815          return __intrin_bitcast<_To>(_mm_cvtepu32_pd(__intrin));
 
  816        else if constexpr (__x_to_x && __have_avx512f)
 
  817          return __intrin_bitcast<_To>(
 
  818            __lo128(_mm512_cvtepu32_pd(__auto_bitcast(__v))));
 
  819        else if constexpr (__x_to_x)
 
  820          return __intrin_bitcast<_To>(
 
  821            _mm_cvtepi32_pd(__to_intrin(__v ^ 0x8000'0000u)) + 0x8000'0000u);
 
  822        else if constexpr (__x_to_y && __have_avx512vl)
 
  823          return __intrin_bitcast<_To>(_mm256_cvtepu32_pd(__intrin));
 
  824        else if constexpr (__x_to_y && __have_avx512f)
 
  825          return __intrin_bitcast<_To>(
 
  826            __lo256(_mm512_cvtepu32_pd(__auto_bitcast(__v))));
 
  827        else if constexpr (__x_to_y)
 
  828          return __intrin_bitcast<_To>(
 
  829            _mm256_cvtepi32_pd(__to_intrin(__v ^ 0x8000'0000u)) + 0x8000'0000u);
 
  830        else if constexpr (__y_to_z)
 
  831          return __intrin_bitcast<_To>(_mm512_cvtepu32_pd(__intrin));
 
  833    else if constexpr (__ibw_to_f64) 
 
  835        return __convert_x86<_To>(
 
  836          __convert_x86<__vector_type_t<
int, 
std::max(
size_t(4), _M)>>(__v));
 
  838    else if constexpr (__f32_to_f64) 
 
  840        if constexpr (__x_to_x)
 
  841          return __intrin_bitcast<_To>(_mm_cvtps_pd(__intrin));
 
  842        else if constexpr (__x_to_y)
 
  843          return __intrin_bitcast<_To>(_mm256_cvtps_pd(__intrin));
 
  844        else if constexpr (__y_to_z)
 
  845          return __intrin_bitcast<_To>(_mm512_cvtps_pd(__intrin));
 
  847    else if constexpr (__f64_to_f32) 
 
  849        if constexpr (__x_to_x)
 
  850          return __intrin_bitcast<_To>(_mm_cvtpd_ps(__intrin));
 
  851        else if constexpr (__y_to_x)
 
  852          return __intrin_bitcast<_To>(_mm256_cvtpd_ps(__intrin));
 
  853        else if constexpr (__z_to_y)
 
  854          return __intrin_bitcast<_To>(_mm512_cvtpd_ps(__intrin));
 
  857      __assert_unreachable<_Tp>();
 
  860    return __vector_convert<_To>(__v, make_index_sequence<
std::min(_M, _Np)>());
 
  866template <
typename _To, 
typename _V, 
typename _Traits>
 
  867  _GLIBCXX_SIMD_INTRINSIC _To
 
  868  __convert_x86(_V __v0, _V __v1)
 
  870    static_assert(__is_vector_type_v<_V>);
 
  871    using _Tp = 
typename _Traits::value_type;
 
  872    constexpr size_t _Np = _Traits::_S_full_size;
 
  873    [[maybe_unused]] 
const auto __i0 = __to_intrin(__v0);
 
  874    [[maybe_unused]] 
const auto __i1 = __to_intrin(__v1);
 
  875    using _Up = 
typename _VectorTraits<_To>::value_type;
 
  876    constexpr size_t _M = _VectorTraits<_To>::_S_full_size;
 
  878    static_assert(2 * _Np <= _M,
 
  879                  "__v1 would be discarded; use the one-argument " 
  880                  "__convert_x86 overload instead");
 
  883    [[maybe_unused]] 
constexpr bool __x_to_x
 
  884      = 
sizeof(__v0) <= 16 && 
sizeof(_To) <= 16;
 
  885    [[maybe_unused]] 
constexpr bool __x_to_y
 
  886      = 
sizeof(__v0) <= 16 && 
sizeof(_To) == 32;
 
  887    [[maybe_unused]] 
constexpr bool __x_to_z
 
  888      = 
sizeof(__v0) <= 16 && 
sizeof(_To) == 64;
 
  889    [[maybe_unused]] 
constexpr bool __y_to_x
 
  890      = 
sizeof(__v0) == 32 && 
sizeof(_To) <= 16;
 
  891    [[maybe_unused]] 
constexpr bool __y_to_y
 
  892      = 
sizeof(__v0) == 32 && 
sizeof(_To) == 32;
 
  893    [[maybe_unused]] 
constexpr bool __y_to_z
 
  894      = 
sizeof(__v0) == 32 && 
sizeof(_To) == 64;
 
  895    [[maybe_unused]] 
constexpr bool __z_to_x
 
  896      = 
sizeof(__v0) == 64 && 
sizeof(_To) <= 16;
 
  897    [[maybe_unused]] 
constexpr bool __z_to_y
 
  898      = 
sizeof(__v0) == 64 && 
sizeof(_To) == 32;
 
  899    [[maybe_unused]] 
constexpr bool __z_to_z
 
  900      = 
sizeof(__v0) == 64 && 
sizeof(_To) == 64;
 
  903    [[maybe_unused]] 
constexpr bool __i_to_i
 
  904      = is_integral_v<_Up> && is_integral_v<_Tp>;
 
  905    [[maybe_unused]] 
constexpr bool __i8_to_i16
 
  906      = __i_to_i && 
sizeof(_Tp) == 1 && 
sizeof(_Up) == 2;
 
  907    [[maybe_unused]] 
constexpr bool __i8_to_i32
 
  908      = __i_to_i && 
sizeof(_Tp) == 1 && 
sizeof(_Up) == 4;
 
  909    [[maybe_unused]] 
constexpr bool __i8_to_i64
 
  910      = __i_to_i && 
sizeof(_Tp) == 1 && 
sizeof(_Up) == 8;
 
  911    [[maybe_unused]] 
constexpr bool __i16_to_i8
 
  912      = __i_to_i && 
sizeof(_Tp) == 2 && 
sizeof(_Up) == 1;
 
  913    [[maybe_unused]] 
constexpr bool __i16_to_i32
 
  914      = __i_to_i && 
sizeof(_Tp) == 2 && 
sizeof(_Up) == 4;
 
  915    [[maybe_unused]] 
constexpr bool __i16_to_i64
 
  916      = __i_to_i && 
sizeof(_Tp) == 2 && 
sizeof(_Up) == 8;
 
  917    [[maybe_unused]] 
constexpr bool __i32_to_i8
 
  918      = __i_to_i && 
sizeof(_Tp) == 4 && 
sizeof(_Up) == 1;
 
  919    [[maybe_unused]] 
constexpr bool __i32_to_i16
 
  920      = __i_to_i && 
sizeof(_Tp) == 4 && 
sizeof(_Up) == 2;
 
  921    [[maybe_unused]] 
constexpr bool __i32_to_i64
 
  922      = __i_to_i && 
sizeof(_Tp) == 4 && 
sizeof(_Up) == 8;
 
  923    [[maybe_unused]] 
constexpr bool __i64_to_i8
 
  924      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 1;
 
  925    [[maybe_unused]] 
constexpr bool __i64_to_i16
 
  926      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 2;
 
  927    [[maybe_unused]] 
constexpr bool __i64_to_i32
 
  928      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 4;
 
  932    [[maybe_unused]] 
constexpr bool __i64_to_f32
 
  933      = is_integral_v<_Tp> && 
sizeof(_Tp) == 8
 
  934        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  935    [[maybe_unused]] 
constexpr bool __s32_to_f32
 
  936      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 4
 
  937        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  938    [[maybe_unused]] 
constexpr bool __s16_to_f32
 
  939      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 2
 
  940        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  941    [[maybe_unused]] 
constexpr bool __s8_to_f32
 
  942      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 1
 
  943        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  944    [[maybe_unused]] 
constexpr bool __u32_to_f32
 
  945      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 4
 
  946        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  947    [[maybe_unused]] 
constexpr bool __u16_to_f32
 
  948      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 2
 
  949        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  950    [[maybe_unused]] 
constexpr bool __u8_to_f32
 
  951      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 1
 
  952        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
  953    [[maybe_unused]] 
constexpr bool __s64_to_f64
 
  954      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 8
 
  955        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  956    [[maybe_unused]] 
constexpr bool __s32_to_f64
 
  957      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 4
 
  958        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  959    [[maybe_unused]] 
constexpr bool __s16_to_f64
 
  960      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 2
 
  961        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  962    [[maybe_unused]] 
constexpr bool __s8_to_f64
 
  963      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 1
 
  964        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  965    [[maybe_unused]] 
constexpr bool __u64_to_f64
 
  966      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 8
 
  967        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  968    [[maybe_unused]] 
constexpr bool __u32_to_f64
 
  969      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 4
 
  970        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  971    [[maybe_unused]] 
constexpr bool __u16_to_f64
 
  972      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 2
 
  973        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  974    [[maybe_unused]] 
constexpr bool __u8_to_f64
 
  975      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 1
 
  976        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
  977    [[maybe_unused]] 
constexpr bool __f32_to_s64
 
  978      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 8
 
  979        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
  980    [[maybe_unused]] 
constexpr bool __f32_to_s32
 
  981      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 4
 
  982        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
  983    [[maybe_unused]] 
constexpr bool __f32_to_u64
 
  984      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 8
 
  985        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
  986    [[maybe_unused]] 
constexpr bool __f32_to_u32
 
  987      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 4
 
  988        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
  989    [[maybe_unused]] 
constexpr bool __f64_to_s64
 
  990      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 8
 
  991        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
  992    [[maybe_unused]] 
constexpr bool __f64_to_s32
 
  993      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 4
 
  994        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
  995    [[maybe_unused]] 
constexpr bool __f64_to_u64
 
  996      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 8
 
  997        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
  998    [[maybe_unused]] 
constexpr bool __f64_to_u32
 
  999      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 4
 
 1000        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
 1001    [[maybe_unused]] 
constexpr bool __f32_to_ibw
 
 1002      = is_integral_v<_Up> && 
sizeof(_Up) <= 2
 
 1003        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
 1004    [[maybe_unused]] 
constexpr bool __f64_to_ibw
 
 1005      = is_integral_v<_Up> && 
sizeof(_Up) <= 2
 
 1006        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
 1007    [[maybe_unused]] 
constexpr bool __f32_to_f64
 
 1008      = is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4
 
 1009        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1010    [[maybe_unused]] 
constexpr bool __f64_to_f32
 
 1011      = is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8
 
 1012        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
 1014    if constexpr (__i_to_i && __y_to_x && !__have_avx2) 
 
 1016      return __convert_x86<_To>(__lo128(__v0), __hi128(__v0), __lo128(__v1),
 
 1018    else if constexpr (__i_to_i) 
 
 1020        static_assert(__x_to_x || __have_avx2,
 
 1021                      "integral conversions with ymm registers require AVX2");
 
 1022        static_assert(__have_avx512bw
 
 1023                        || ((
sizeof(_Tp) >= 4 || 
sizeof(__v0) < 64)
 
 1024                            && (
sizeof(_Up) >= 4 || 
sizeof(_To) < 64)),
 
 1025                      "8/16-bit integers in zmm registers require AVX512BW");
 
 1026        static_assert((
sizeof(__v0) < 64 && 
sizeof(_To) < 64) || __have_avx512f,
 
 1027                      "integral conversions with ymm registers require AVX2");
 
 1030    if constexpr (
sizeof(__v0) < 16 || (
sizeof(__v0) == 16 && __have_avx2)
 
 1031                  || (
sizeof(__v0) == 16 && __have_avx
 
 1032                      && is_floating_point_v<_Tp>)
 
 1033                  || (
sizeof(__v0) == 32 && __have_avx512f
 
 1034                      && (
sizeof(_Tp) >= 4 || __have_avx512bw)))
 
 1038        return __convert_x86<_To>(__concat(__v0, __v1));
 
 1045          !(is_floating_point_v<
 
 1046              _Tp> == is_floating_point_v<_Up> && 
sizeof(_Tp) == 
sizeof(_Up)));
 
 1048        if constexpr (2 * _Np < _M && 
sizeof(_To) > 16)
 
 1050            constexpr size_t Min = 16 / 
sizeof(_Up);
 
 1051            return __zero_extend(
 
 1053                __vector_type_t<_Up, (Min > 2 * _Np) ? Min : 2 * _Np>>(__v0,
 
 1056        else if constexpr (__i64_to_i32) 
 
 1058            if constexpr (__x_to_x)
 
 1059              return __auto_bitcast(_mm_shuffle_ps(__auto_bitcast(__v0),
 
 1060                                                   __auto_bitcast(__v1), 0x88));
 
 1061            else if constexpr (__y_to_y)
 
 1064                return __auto_bitcast(
 
 1065                  __xzyw(_mm256_shuffle_ps(__auto_bitcast(__v0),
 
 1066                                           __auto_bitcast(__v1), 0x88)));
 
 1074            else if constexpr (__z_to_z)
 
 1075              return __intrin_bitcast<_To>(
 
 1076                __concat(_mm512_cvtepi64_epi32(__i0),
 
 1077                         _mm512_cvtepi64_epi32(__i1)));
 
 1079        else if constexpr (__i64_to_i16) 
 
 1081            if constexpr (__x_to_x)
 
 1084                if constexpr (__have_sse4_1)
 
 1086                    return __intrin_bitcast<_To>(_mm_shuffle_epi8(
 
 1087                      _mm_blend_epi16(__i0, _mm_slli_si128(__i1, 4), 0x44),
 
 1088                      _mm_setr_epi8(0, 1, 8, 9, 4, 5, 12, 13, -0x80, -0x80,
 
 1089                                    -0x80, -0x80, -0x80, -0x80, -0x80, -0x80)));
 
 1093                    return __vector_type_t<_Up, _M>{_Up(__v0[0]), _Up(__v0[1]),
 
 1094                                                    _Up(__v1[0]), _Up(__v1[1])};
 
 1097            else if constexpr (__y_to_x)
 
 1100                  = _mm256_unpacklo_epi16(__i0, __i1); 
 
 1102                  = _mm256_unpackhi_epi16(__i0, __i1); 
 
 1104                  = _mm256_unpacklo_epi16(__a, __b); 
 
 1105                return __intrin_bitcast<_To>(
 
 1106                  _mm_unpacklo_epi32(__lo128(__c), __hi128(__c))); 
 
 1108            else if constexpr (__z_to_y)
 
 1109              return __intrin_bitcast<_To>(
 
 1110                __concat(_mm512_cvtepi64_epi16(__i0),
 
 1111                         _mm512_cvtepi64_epi16(__i1)));
 
 1113        else if constexpr (__i64_to_i8) 
 
 1115            if constexpr (__x_to_x && __have_sse4_1)
 
 1117                return __intrin_bitcast<_To>(_mm_shuffle_epi8(
 
 1118                  _mm_blend_epi16(__i0, _mm_slli_si128(__i1, 4), 0x44),
 
 1119                  _mm_setr_epi8(0, 8, 4, 12, -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1120                                -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1123            else if constexpr (__x_to_x && __have_ssse3)
 
 1125                return __intrin_bitcast<_To>(_mm_unpacklo_epi16(
 
 1127                    __i0, _mm_setr_epi8(0, 8, -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1128                                        -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1129                                        -0x80, -0x80, -0x80, -0x80)),
 
 1131                    __i1, _mm_setr_epi8(0, 8, -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1132                                        -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1133                                        -0x80, -0x80, -0x80, -0x80))));
 
 1135            else if constexpr (__x_to_x)
 
 1137                return __vector_type_t<_Up, _M>{_Up(__v0[0]), _Up(__v0[1]),
 
 1138                                                _Up(__v1[0]), _Up(__v1[1])};
 
 1140            else if constexpr (__y_to_x)
 
 1142                const auto __a = _mm256_shuffle_epi8(
 
 1143                  _mm256_blend_epi32(__i0, _mm256_slli_epi64(__i1, 32), 0xAA),
 
 1144                  _mm256_setr_epi8(0, 8, -0x80, -0x80, 4, 12, -0x80, -0x80,
 
 1145                                   -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1146                                   -0x80, -0x80, -0x80, -0x80, 0, 8, -0x80,
 
 1147                                   -0x80, 4, 12, -0x80, -0x80, -0x80, -0x80,
 
 1148                                   -0x80, -0x80, -0x80, -0x80));
 
 1149                return __intrin_bitcast<_To>(__lo128(__a) | __hi128(__a));
 
 1152        else if constexpr (__i32_to_i16) 
 
 1154            if constexpr (__x_to_x)
 
 1157                if constexpr (__have_sse4_1)
 
 1159                    return __intrin_bitcast<_To>(_mm_shuffle_epi8(
 
 1160                      _mm_blend_epi16(__i0, _mm_slli_si128(__i1, 2), 0xaa),
 
 1161                      _mm_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10,
 
 1164                else if constexpr (__have_ssse3)
 
 1166                    return __intrin_bitcast<_To>(
 
 1167                      _mm_hadd_epi16(__to_intrin(__v0 << 16),
 
 1168                                     __to_intrin(__v1 << 16)));
 
 1179                    auto __a = _mm_unpacklo_epi16(__i0, __i1); 
 
 1180                    auto __b = _mm_unpackhi_epi16(__i0, __i1); 
 
 1181                    auto __c = _mm_unpacklo_epi16(__a, __b);   
 
 1182                    auto __d = _mm_unpackhi_epi16(__a, __b);   
 
 1183                    return __intrin_bitcast<_To>(
 
 1184                      _mm_unpacklo_epi16(__c, __d)); 
 
 1187            else if constexpr (__y_to_y)
 
 1190                  = _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80,
 
 1191                                     -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1192                                     0, 1, 4, 5, 8, 9, 12, 13, -0x80, -0x80,
 
 1193                                     -0x80, -0x80, -0x80, -0x80, -0x80, -0x80);
 
 1194                auto __a = _mm256_shuffle_epi8(__i0, __shuf);
 
 1195                auto __b = _mm256_shuffle_epi8(__i1, __shuf);
 
 1196                return __intrin_bitcast<_To>(
 
 1197                  __xzyw(_mm256_unpacklo_epi64(__a, __b)));
 
 1200        else if constexpr (__i32_to_i8) 
 
 1202            if constexpr (__x_to_x && __have_ssse3)
 
 1205                  = _mm_setr_epi8(0, 4, 8, 12, -0x80, -0x80, -0x80, -0x80,
 
 1206                                  -0x80, -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1208                return __intrin_bitcast<_To>(
 
 1209                  _mm_unpacklo_epi32(_mm_shuffle_epi8(__i0, shufmask),
 
 1210                                     _mm_shuffle_epi8(__i1, shufmask)));
 
 1212            else if constexpr (__x_to_x)
 
 1214                auto __a = _mm_unpacklo_epi8(__i0, __i1); 
 
 1215                auto __b = _mm_unpackhi_epi8(__i0, __i1); 
 
 1216                auto __c = _mm_unpacklo_epi8(__a, __b);   
 
 1217                auto __d = _mm_unpackhi_epi8(__a, __b);   
 
 1218                auto __e = _mm_unpacklo_epi8(__c, __d);   
 
 1219                return __intrin_bitcast<_To>(__e & __m128i{-1, 0});
 
 1221            else if constexpr (__y_to_x)
 
 1223                const auto __a = _mm256_shuffle_epi8(
 
 1224                  _mm256_blend_epi16(__i0, _mm256_slli_epi32(__i1, 16), 0xAA),
 
 1225                  _mm256_setr_epi8(0, 4, 8, 12, -0x80, -0x80, -0x80, -0x80, 2,
 
 1226                                   6, 10, 14, -0x80, -0x80, -0x80, -0x80, -0x80,
 
 1227                                   -0x80, -0x80, -0x80, 0, 4, 8, 12, -0x80,
 
 1228                                   -0x80, -0x80, -0x80, 2, 6, 10, 14));
 
 1229                return __intrin_bitcast<_To>(__lo128(__a) | __hi128(__a));
 
 1232        else if constexpr (__i16_to_i8) 
 
 1234            if constexpr (__x_to_x && __have_ssse3)
 
 1236                const auto __shuf = 
reinterpret_cast<__m128i
>(
 
 1237                  __vector_type_t<_UChar, 16>{0, 2, 4, 6, 8, 10, 12, 14, 0x80,
 
 1238                                              0x80, 0x80, 0x80, 0x80, 0x80,
 
 1240                return __intrin_bitcast<_To>(
 
 1241                  _mm_unpacklo_epi64(_mm_shuffle_epi8(__i0, __shuf),
 
 1242                                     _mm_shuffle_epi8(__i1, __shuf)));
 
 1244            else if constexpr (__x_to_x)
 
 1246                auto __a = _mm_unpacklo_epi8(__i0, __i1); 
 
 1247                auto __b = _mm_unpackhi_epi8(__i0, __i1); 
 
 1248                auto __c = _mm_unpacklo_epi8(__a, __b);   
 
 1249                auto __d = _mm_unpackhi_epi8(__a, __b);   
 
 1250                auto __e = _mm_unpacklo_epi8(__c, __d);   
 
 1251                auto __f = _mm_unpackhi_epi8(__c, __d);   
 
 1252                return __intrin_bitcast<_To>(_mm_unpacklo_epi8(__e, __f));
 
 1254            else if constexpr (__y_to_y)
 
 1256                return __intrin_bitcast<_To>(__xzyw(_mm256_shuffle_epi8(
 
 1257                  (__to_intrin(__v0) & _mm256_set1_epi32(0x00ff00ff))
 
 1258                    | _mm256_slli_epi16(__i1, 8),
 
 1259                  _mm256_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11,
 
 1260                                   13, 15, 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5,
 
 1261                                   7, 9, 11, 13, 15))));
 
 1264        else if constexpr (__i64_to_f32) 
 
 1266            if constexpr (__x_to_x)
 
 1267              return __make_wrapper<float>(__v0[0], __v0[1], __v1[0], __v1[1]);
 
 1268            else if constexpr (__y_to_y)
 
 1270                static_assert(__y_to_y && __have_avx2);
 
 1271                const auto __a = _mm256_unpacklo_epi32(__i0, __i1); 
 
 1272                const auto __b = _mm256_unpackhi_epi32(__i0, __i1); 
 
 1274                  = _mm256_unpacklo_epi32(__a, __b); 
 
 1275                const auto __hi32 = __vector_bitcast<
 
 1276                  conditional_t<is_signed_v<_Tp>, int, _UInt>>(
 
 1277                  _mm256_unpackhi_epi32(__a, __b)); 
 
 1280                    * __convert_x86<__vector_type_t<float, 8>>(__hi32);
 
 1282                  = 0x10000 * _mm256_cvtepi32_ps(_mm256_srli_epi32(__lo32, 16));
 
 1284                  = _mm256_cvtepi32_ps(_mm256_set1_epi32(0x0000ffffu) & __lo32);
 
 1285                return __xzyw((__hi + __mid) + __lo);
 
 1287            else if constexpr (__z_to_z && __have_avx512dq)
 
 1289                return is_signed_v<_Tp> ? __concat(_mm512_cvtepi64_ps(__i0),
 
 1290                                                   _mm512_cvtepi64_ps(__i1))
 
 1291                                        : __concat(_mm512_cvtepu64_ps(__i0),
 
 1292                                                   _mm512_cvtepu64_ps(__i1));
 
 1294            else if constexpr (__z_to_z && is_signed_v<_Tp>)
 
 1296                const __m512 __hi32 = _mm512_cvtepi32_ps(
 
 1297                  __concat(_mm512_cvtepi64_epi32(__to_intrin(__v0 >> 32)),
 
 1298                           _mm512_cvtepi64_epi32(__to_intrin(__v1 >> 32))));
 
 1299                const __m512i __lo32 = __concat(_mm512_cvtepi64_epi32(__i0),
 
 1300                                                _mm512_cvtepi64_epi32(__i1));
 
 1306                  = _mm512_cvtepu32_ps(_mm512_set1_epi32(0xffff0000u) & __lo32);
 
 1308                  = _mm512_cvtepi32_ps(_mm512_set1_epi32(0x0000ffffu) & __lo32);
 
 1309                return (__hi32 * 0x100000000LL + __hi16) + __lo16;
 
 1311            else if constexpr (__z_to_z && is_unsigned_v<_Tp>)
 
 1313                return __intrin_bitcast<_To>(
 
 1314                  _mm512_cvtepu32_ps(__concat(
 
 1315                    _mm512_cvtepi64_epi32(_mm512_srai_epi64(__i0, 32)),
 
 1316                    _mm512_cvtepi64_epi32(_mm512_srai_epi64(__i1, 32))))
 
 1318                  + _mm512_cvtepu32_ps(__concat(_mm512_cvtepi64_epi32(__i0),
 
 1319                                                _mm512_cvtepi64_epi32(__i1))));
 
 1322        else if constexpr (__f64_to_s32) 
 
 1326        else if constexpr (__f64_to_u32) 
 
 1328            if constexpr (__x_to_x && __have_sse4_1)
 
 1330                return __vector_bitcast<_Up, _M>(_mm_unpacklo_epi64(
 
 1331                         _mm_cvttpd_epi32(_mm_floor_pd(__i0) - 0x8000'0000u),
 
 1332                         _mm_cvttpd_epi32(_mm_floor_pd(__i1) - 0x8000'0000u)))
 
 1337            else if constexpr (__y_to_y)
 
 1339                return __vector_bitcast<_Up>(
 
 1340                         __concat(_mm256_cvttpd_epi32(_mm256_floor_pd(__i0)
 
 1342                                  _mm256_cvttpd_epi32(_mm256_floor_pd(__i1)
 
 1347        else if constexpr (__f64_to_ibw) 
 
 1354            return __convert_x86<_To>(
 
 1355              __convert_x86<__vector_type_t<int, _Np * 2>>(__v0, __v1));
 
 1358        else if constexpr (__f32_to_ibw) 
 
 1360            return __convert_x86<_To>(
 
 1361              __convert_x86<__vector_type_t<int, _Np>>(__v0),
 
 1362              __convert_x86<__vector_type_t<int, _Np>>(__v1));
 
 1366        if constexpr (
sizeof(_To) >= 32)
 
 1368          return __concat(__convert_x86<__vector_type_t<_Up, _M / 2>>(__v0),
 
 1369                          __convert_x86<__vector_type_t<_Up, _M / 2>>(__v1));
 
 1370        else if constexpr (
sizeof(_To) == 16)
 
 1372            const auto __lo = __to_intrin(__convert_x86<_To>(__v0));
 
 1373            const auto __hi = __to_intrin(__convert_x86<_To>(__v1));
 
 1374            if constexpr (
sizeof(_Up) * _Np == 8)
 
 1376                if constexpr (is_floating_point_v<_Up>)
 
 1377                  return __auto_bitcast(
 
 1378                    _mm_unpacklo_pd(__vector_bitcast<double>(__lo),
 
 1379                                    __vector_bitcast<double>(__hi)));
 
 1381                  return __intrin_bitcast<_To>(_mm_unpacklo_epi64(__lo, __hi));
 
 1383            else if constexpr (
sizeof(_Up) * _Np == 4)
 
 1385                if constexpr (is_floating_point_v<_Up>)
 
 1386                  return __auto_bitcast(
 
 1387                    _mm_unpacklo_ps(__vector_bitcast<float>(__lo),
 
 1388                                    __vector_bitcast<float>(__hi)));
 
 1390                  return __intrin_bitcast<_To>(_mm_unpacklo_epi32(__lo, __hi));
 
 1392            else if constexpr (
sizeof(_Up) * _Np == 2)
 
 1393              return __intrin_bitcast<_To>(_mm_unpacklo_epi16(__lo, __hi));
 
 1395              __assert_unreachable<_Tp>();
 
 1398          return __vector_convert<_To>(__v0, __v1, make_index_sequence<_Np>());
 
 1405template <
typename _To, 
typename _V, 
typename _Traits>
 
 1406  _GLIBCXX_SIMD_INTRINSIC _To
 
 1407  __convert_x86(_V __v0, _V __v1, _V __v2, _V __v3)
 
 1409    static_assert(__is_vector_type_v<_V>);
 
 1410    using _Tp = 
typename _Traits::value_type;
 
 1411    constexpr size_t _Np = _Traits::_S_full_size;
 
 1412    [[maybe_unused]] 
const auto __i0 = __to_intrin(__v0);
 
 1413    [[maybe_unused]] 
const auto __i1 = __to_intrin(__v1);
 
 1414    [[maybe_unused]] 
const auto __i2 = __to_intrin(__v2);
 
 1415    [[maybe_unused]] 
const auto __i3 = __to_intrin(__v3);
 
 1416    using _Up = 
typename _VectorTraits<_To>::value_type;
 
 1417    constexpr size_t _M = _VectorTraits<_To>::_S_full_size;
 
 1419    static_assert(4 * _Np <= _M,
 
 1420                  "__v2/__v3 would be discarded; use the two/one-argument " 
 1421                  "__convert_x86 overload instead");
 
 1424    [[maybe_unused]] 
constexpr bool __x_to_x
 
 1425      = 
sizeof(__v0) <= 16 && 
sizeof(_To) <= 16;
 
 1426    [[maybe_unused]] 
constexpr bool __x_to_y
 
 1427      = 
sizeof(__v0) <= 16 && 
sizeof(_To) == 32;
 
 1428    [[maybe_unused]] 
constexpr bool __x_to_z
 
 1429      = 
sizeof(__v0) <= 16 && 
sizeof(_To) == 64;
 
 1430    [[maybe_unused]] 
constexpr bool __y_to_x
 
 1431      = 
sizeof(__v0) == 32 && 
sizeof(_To) <= 16;
 
 1432    [[maybe_unused]] 
constexpr bool __y_to_y
 
 1433      = 
sizeof(__v0) == 32 && 
sizeof(_To) == 32;
 
 1434    [[maybe_unused]] 
constexpr bool __y_to_z
 
 1435      = 
sizeof(__v0) == 32 && 
sizeof(_To) == 64;
 
 1436    [[maybe_unused]] 
constexpr bool __z_to_x
 
 1437      = 
sizeof(__v0) == 64 && 
sizeof(_To) <= 16;
 
 1438    [[maybe_unused]] 
constexpr bool __z_to_y
 
 1439      = 
sizeof(__v0) == 64 && 
sizeof(_To) == 32;
 
 1440    [[maybe_unused]] 
constexpr bool __z_to_z
 
 1441      = 
sizeof(__v0) == 64 && 
sizeof(_To) == 64;
 
 1444    [[maybe_unused]] 
constexpr bool __i_to_i
 
 1445      = is_integral_v<_Up> && is_integral_v<_Tp>;
 
 1446    [[maybe_unused]] 
constexpr bool __i8_to_i16
 
 1447      = __i_to_i && 
sizeof(_Tp) == 1 && 
sizeof(_Up) == 2;
 
 1448    [[maybe_unused]] 
constexpr bool __i8_to_i32
 
 1449      = __i_to_i && 
sizeof(_Tp) == 1 && 
sizeof(_Up) == 4;
 
 1450    [[maybe_unused]] 
constexpr bool __i8_to_i64
 
 1451      = __i_to_i && 
sizeof(_Tp) == 1 && 
sizeof(_Up) == 8;
 
 1452    [[maybe_unused]] 
constexpr bool __i16_to_i8
 
 1453      = __i_to_i && 
sizeof(_Tp) == 2 && 
sizeof(_Up) == 1;
 
 1454    [[maybe_unused]] 
constexpr bool __i16_to_i32
 
 1455      = __i_to_i && 
sizeof(_Tp) == 2 && 
sizeof(_Up) == 4;
 
 1456    [[maybe_unused]] 
constexpr bool __i16_to_i64
 
 1457      = __i_to_i && 
sizeof(_Tp) == 2 && 
sizeof(_Up) == 8;
 
 1458    [[maybe_unused]] 
constexpr bool __i32_to_i8
 
 1459      = __i_to_i && 
sizeof(_Tp) == 4 && 
sizeof(_Up) == 1;
 
 1460    [[maybe_unused]] 
constexpr bool __i32_to_i16
 
 1461      = __i_to_i && 
sizeof(_Tp) == 4 && 
sizeof(_Up) == 2;
 
 1462    [[maybe_unused]] 
constexpr bool __i32_to_i64
 
 1463      = __i_to_i && 
sizeof(_Tp) == 4 && 
sizeof(_Up) == 8;
 
 1464    [[maybe_unused]] 
constexpr bool __i64_to_i8
 
 1465      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 1;
 
 1466    [[maybe_unused]] 
constexpr bool __i64_to_i16
 
 1467      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 2;
 
 1468    [[maybe_unused]] 
constexpr bool __i64_to_i32
 
 1469      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 4;
 
 1473    [[maybe_unused]] 
constexpr bool __i64_to_f32
 
 1474      = is_integral_v<_Tp> && 
sizeof(_Tp) == 8
 
 1475        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
 1476    [[maybe_unused]] 
constexpr bool __s32_to_f32
 
 1477      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 4
 
 1478        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
 1479    [[maybe_unused]] 
constexpr bool __s16_to_f32
 
 1480      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 2
 
 1481        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
 1482    [[maybe_unused]] 
constexpr bool __s8_to_f32
 
 1483      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 1
 
 1484        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
 1485    [[maybe_unused]] 
constexpr bool __u32_to_f32
 
 1486      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 4
 
 1487        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
 1488    [[maybe_unused]] 
constexpr bool __u16_to_f32
 
 1489      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 2
 
 1490        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
 1491    [[maybe_unused]] 
constexpr bool __u8_to_f32
 
 1492      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 1
 
 1493        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
 1494    [[maybe_unused]] 
constexpr bool __s64_to_f64
 
 1495      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 8
 
 1496        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1497    [[maybe_unused]] 
constexpr bool __s32_to_f64
 
 1498      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 4
 
 1499        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1500    [[maybe_unused]] 
constexpr bool __s16_to_f64
 
 1501      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 2
 
 1502        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1503    [[maybe_unused]] 
constexpr bool __s8_to_f64
 
 1504      = is_integral_v<_Tp> && is_signed_v<_Tp> && 
sizeof(_Tp) == 1
 
 1505        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1506    [[maybe_unused]] 
constexpr bool __u64_to_f64
 
 1507      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 8
 
 1508        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1509    [[maybe_unused]] 
constexpr bool __u32_to_f64
 
 1510      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 4
 
 1511        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1512    [[maybe_unused]] 
constexpr bool __u16_to_f64
 
 1513      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 2
 
 1514        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1515    [[maybe_unused]] 
constexpr bool __u8_to_f64
 
 1516      = is_integral_v<_Tp> && is_unsigned_v<_Tp> && 
sizeof(_Tp) == 1
 
 1517        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1518    [[maybe_unused]] 
constexpr bool __f32_to_s64
 
 1519      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 8
 
 1520        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
 1521    [[maybe_unused]] 
constexpr bool __f32_to_s32
 
 1522      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 4
 
 1523        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
 1524    [[maybe_unused]] 
constexpr bool __f32_to_u64
 
 1525      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 8
 
 1526        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
 1527    [[maybe_unused]] 
constexpr bool __f32_to_u32
 
 1528      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 4
 
 1529        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
 1530    [[maybe_unused]] 
constexpr bool __f64_to_s64
 
 1531      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 8
 
 1532        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
 1533    [[maybe_unused]] 
constexpr bool __f64_to_s32
 
 1534      = is_integral_v<_Up> && is_signed_v<_Up> && 
sizeof(_Up) == 4
 
 1535        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
 1536    [[maybe_unused]] 
constexpr bool __f64_to_u64
 
 1537      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 8
 
 1538        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
 1539    [[maybe_unused]] 
constexpr bool __f64_to_u32
 
 1540      = is_integral_v<_Up> && is_unsigned_v<_Up> && 
sizeof(_Up) == 4
 
 1541        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
 1542    [[maybe_unused]] 
constexpr bool __f32_to_ibw
 
 1543      = is_integral_v<_Up> && 
sizeof(_Up) <= 2
 
 1544        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4;
 
 1545    [[maybe_unused]] 
constexpr bool __f64_to_ibw
 
 1546      = is_integral_v<_Up> && 
sizeof(_Up) <= 2
 
 1547        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
 1548    [[maybe_unused]] 
constexpr bool __f32_to_f64
 
 1549      = is_floating_point_v<_Tp> && 
sizeof(_Tp) == 4
 
 1550        && is_floating_point_v<_Up> && 
sizeof(_Up) == 8;
 
 1551    [[maybe_unused]] 
constexpr bool __f64_to_f32
 
 1552      = is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8
 
 1553        && is_floating_point_v<_Up> && 
sizeof(_Up) == 4;
 
 1555    if constexpr (__i_to_i && __y_to_x && !__have_avx2) 
 
 1558        return __convert_x86<_To>(__lo128(__v0), __hi128(__v0), __lo128(__v1),
 
 1559                                  __hi128(__v1), __lo128(__v2), __hi128(__v2),
 
 1560                                  __lo128(__v3), __hi128(__v3));
 
 1562    else if constexpr (__i_to_i) 
 
 1564        static_assert(__x_to_x || __have_avx2,
 
 1565                      "integral conversions with ymm registers require AVX2");
 
 1566        static_assert(__have_avx512bw
 
 1567                        || ((
sizeof(_Tp) >= 4 || 
sizeof(__v0) < 64)
 
 1568                            && (
sizeof(_Up) >= 4 || 
sizeof(_To) < 64)),
 
 1569                      "8/16-bit integers in zmm registers require AVX512BW");
 
 1570        static_assert((
sizeof(__v0) < 64 && 
sizeof(_To) < 64) || __have_avx512f,
 
 1571                      "integral conversions with ymm registers require AVX2");
 
 1574    if constexpr (
sizeof(__v0) < 16 || (
sizeof(__v0) == 16 && __have_avx2)
 
 1575                  || (
sizeof(__v0) == 16 && __have_avx
 
 1576                      && is_floating_point_v<_Tp>)
 
 1577                  || (
sizeof(__v0) == 32 && __have_avx512f))
 
 1581        return __convert_x86<_To>(__concat(__v0, __v1), __concat(__v2, __v3));
 
 1588          !(is_floating_point_v<
 
 1589              _Tp> == is_floating_point_v<_Up> && 
sizeof(_Tp) == 
sizeof(_Up)));
 
 1591        if constexpr (4 * _Np < _M && 
sizeof(_To) > 16)
 
 1593            constexpr size_t Min = 16 / 
sizeof(_Up);
 
 1594            return __zero_extend(
 
 1596                __vector_type_t<_Up, (Min > 4 * _Np) ? Min : 4 * _Np>>(
 
 1597                __v0, __v1, __v2, __v3));
 
 1599        else if constexpr (__i64_to_i16) 
 
 1601            if constexpr (__x_to_x && __have_sse4_1)
 
 1603                return __intrin_bitcast<_To>(_mm_shuffle_epi8(
 
 1605                    _mm_blend_epi16(__i0, _mm_slli_si128(__i1, 2), 0x22),
 
 1606                    _mm_blend_epi16(_mm_slli_si128(__i2, 4),
 
 1607                                    _mm_slli_si128(__i3, 6), 0x88),
 
 1609                  _mm_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7,
 
 1612            else if constexpr (__y_to_y && __have_avx2)
 
 1614                return __intrin_bitcast<_To>(_mm256_shuffle_epi8(
 
 1615                  __xzyw(_mm256_blend_epi16(
 
 1617                      _mm256_shuffle_ps(__vector_bitcast<float>(__v0),
 
 1618                                        __vector_bitcast<float>(__v2),
 
 1620                    __to_intrin(__vector_bitcast<int>(_mm256_shuffle_ps(
 
 1621                                  __vector_bitcast<float>(__v1),
 
 1622                                  __vector_bitcast<float>(__v3), 0x88))
 
 1626                  _mm256_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11,
 
 1627                                   14, 15, 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7,
 
 1648        else if constexpr (__i64_to_i8) 
 
 1650            if constexpr (__x_to_x)
 
 1654            else if constexpr (__y_to_x)
 
 1657                  = _mm256_srli_epi32(_mm256_slli_epi32(__i0, 24), 24)
 
 1658                    | _mm256_srli_epi32(_mm256_slli_epi32(__i1, 24), 16)
 
 1659                    | _mm256_srli_epi32(_mm256_slli_epi32(__i2, 24), 8)
 
 1660                    | _mm256_slli_epi32(
 
 1666                auto __b = _mm256_unpackhi_epi64(
 
 1668                auto __c = _mm256_unpacklo_epi8(
 
 1670                return __intrin_bitcast<_To>(
 
 1671                  _mm_unpacklo_epi16(__lo128(__c),
 
 1675        else if constexpr (__i32_to_i8) 
 
 1677            if constexpr (__x_to_x)
 
 1679                if constexpr (__have_ssse3)
 
 1681                    const auto __x0 = __vector_bitcast<_UInt>(__v0) & 0xff;
 
 1682                    const auto __x1 = (__vector_bitcast<_UInt>(__v1) & 0xff)
 
 1684                    const auto __x2 = (__vector_bitcast<_UInt>(__v2) & 0xff)
 
 1686                    const auto __x3 = __vector_bitcast<_UInt>(__v3) << 24;
 
 1687                    return __intrin_bitcast<_To>(
 
 1688                      _mm_shuffle_epi8(__to_intrin(__x0 | __x1 | __x2 | __x3),
 
 1689                                       _mm_setr_epi8(0, 4, 8, 12, 1, 5, 9, 13,
 
 1690                                                     2, 6, 10, 14, 3, 7, 11,
 
 1696                      = _mm_unpacklo_epi8(__i0, __i2); 
 
 1698                      = _mm_unpackhi_epi8(__i0, __i2); 
 
 1700                      = _mm_unpacklo_epi8(__i1, __i3); 
 
 1702                      = _mm_unpackhi_epi8(__i1, __i3); 
 
 1704                      = _mm_unpacklo_epi8(__a, __c); 
 
 1706                      = _mm_unpackhi_epi8(__a, __c); 
 
 1708                      = _mm_unpacklo_epi8(__b, __d); 
 
 1710                      = _mm_unpackhi_epi8(__b, __d); 
 
 1711                    return __intrin_bitcast<_To>(_mm_unpacklo_epi8(
 
 1712                      _mm_unpacklo_epi8(__e, __g), 
 
 1713                      _mm_unpacklo_epi8(__f, __h)  
 
 1717            else if constexpr (__y_to_y)
 
 1719                const auto __a = _mm256_shuffle_epi8(
 
 1720                  __to_intrin((__vector_bitcast<_UShort>(_mm256_blend_epi16(
 
 1721                                 __i0, _mm256_slli_epi32(__i1, 16), 0xAA))
 
 1723                              | (__vector_bitcast<_UShort>(_mm256_blend_epi16(
 
 1724                                   __i2, _mm256_slli_epi32(__i3, 16), 0xAA))
 
 1726                  _mm256_setr_epi8(0, 4, 8, 12, 2, 6, 10, 14, 1, 5, 9, 13, 3, 7,
 
 1727                                   11, 15, 0, 4, 8, 12, 2, 6, 10, 14, 1, 5, 9,
 
 1729                return __intrin_bitcast<_To>(_mm256_permutevar8x32_epi32(
 
 1730                  __a, _mm256_setr_epi32(0, 4, 1, 5, 2, 6, 3, 7)));
 
 1733        else if constexpr (__i64_to_f32) 
 
 1737            if constexpr (__x_to_y)
 
 1739                return __make_wrapper<float>(__v0[0], __v0[1], __v1[0], __v1[1],
 
 1740                                             __v2[0], __v2[1], __v3[0],
 
 1743                const auto __a = _mm_unpacklo_epi32(__i0, __i1);   
 
 1744                const auto __b = _mm_unpackhi_epi32(__i0, __i1);   
 
 1745                const auto __c = _mm_unpacklo_epi32(__i2, __i3);   
 
 1746                const auto __d = _mm_unpackhi_epi32(__i2, __i3);   
 
 1747                const auto __lo32a = _mm_unpacklo_epi32(__a, __b); 
 
 1748                const auto __lo32b = _mm_unpacklo_epi32(__c, __d); 
 
 1749                const auto __hi32 = __vector_bitcast<
 
 1750                  conditional_t<is_signed_v<_Tp>, int, _UInt>>(
 
 1751                  __concat(_mm_unpackhi_epi32(__a, __b),
 
 1752                           _mm_unpackhi_epi32(__c, __d))); 
 
 1755                    * __convert_x86<__vector_type_t<float, 8>>(__hi32);
 
 1758                    * _mm256_cvtepi32_ps(__concat(_mm_srli_epi32(__lo32a, 16),
 
 1759                                                  _mm_srli_epi32(__lo32b, 16)));
 
 1760                const auto __lo = _mm256_cvtepi32_ps(
 
 1761                  __concat(_mm_set1_epi32(0x0000ffffu) & __lo32a,
 
 1762                           _mm_set1_epi32(0x0000ffffu) & __lo32b));
 
 1763                return (__hi + __mid) + __lo;
 
 1766        else if constexpr (__f64_to_ibw) 
 
 1768            return __convert_x86<_To>(
 
 1769              __convert_x86<__vector_type_t<int, _Np * 2>>(__v0, __v1),
 
 1770              __convert_x86<__vector_type_t<int, _Np * 2>>(__v2, __v3));
 
 1772        else if constexpr (__f32_to_ibw) 
 
 1774            return __convert_x86<_To>(
 
 1775              __convert_x86<__vector_type_t<int, _Np>>(__v0),
 
 1776              __convert_x86<__vector_type_t<int, _Np>>(__v1),
 
 1777              __convert_x86<__vector_type_t<int, _Np>>(__v2),
 
 1778              __convert_x86<__vector_type_t<int, _Np>>(__v3));
 
 1782        if constexpr (
sizeof(_To) >= 32)
 
 1784          return __concat(__convert_x86<__vector_type_t<_Up, _M / 2>>(__v0,
 
 1786                          __convert_x86<__vector_type_t<_Up, _M / 2>>(__v2,
 
 1788        else if constexpr (
sizeof(_To) == 16)
 
 1790            const auto __lo = __to_intrin(__convert_x86<_To>(__v0, __v1));
 
 1791            const auto __hi = __to_intrin(__convert_x86<_To>(__v2, __v3));
 
 1792            if constexpr (
sizeof(_Up) * _Np * 2 == 8)
 
 1794                if constexpr (is_floating_point_v<_Up>)
 
 1795                  return __auto_bitcast(_mm_unpacklo_pd(__lo, __hi));
 
 1797                  return __intrin_bitcast<_To>(_mm_unpacklo_epi64(__lo, __hi));
 
 1799            else if constexpr (
sizeof(_Up) * _Np * 2 == 4)
 
 1801                if constexpr (is_floating_point_v<_Up>)
 
 1802                  return __auto_bitcast(_mm_unpacklo_ps(__lo, __hi));
 
 1804                  return __intrin_bitcast<_To>(_mm_unpacklo_epi32(__lo, __hi));
 
 1807              __assert_unreachable<_Tp>();
 
 1810          return __vector_convert<_To>(__v0, __v1, __v2, __v3,
 
 1811                                       make_index_sequence<_Np>());
 
 1818template <
typename _To, 
typename _V, 
typename _Traits>
 
 1819  _GLIBCXX_SIMD_INTRINSIC _To
 
 1820  __convert_x86(_V __v0, _V __v1, _V __v2, _V __v3, _V __v4, _V __v5, _V __v6,
 
 1823    static_assert(__is_vector_type_v<_V>);
 
 1824    using _Tp = 
typename _Traits::value_type;
 
 1825    constexpr size_t _Np = _Traits::_S_full_size;
 
 1826    [[maybe_unused]] 
const auto __i0 = __to_intrin(__v0);
 
 1827    [[maybe_unused]] 
const auto __i1 = __to_intrin(__v1);
 
 1828    [[maybe_unused]] 
const auto __i2 = __to_intrin(__v2);
 
 1829    [[maybe_unused]] 
const auto __i3 = __to_intrin(__v3);
 
 1830    [[maybe_unused]] 
const auto __i4 = __to_intrin(__v4);
 
 1831    [[maybe_unused]] 
const auto __i5 = __to_intrin(__v5);
 
 1832    [[maybe_unused]] 
const auto __i6 = __to_intrin(__v6);
 
 1833    [[maybe_unused]] 
const auto __i7 = __to_intrin(__v7);
 
 1834    using _Up = 
typename _VectorTraits<_To>::value_type;
 
 1835    constexpr size_t _M = _VectorTraits<_To>::_S_full_size;
 
 1837    static_assert(8 * _Np <= _M,
 
 1838                  "__v4-__v7 would be discarded; use the four/two/one-argument " 
 1839                  "__convert_x86 overload instead");
 
 1842    [[maybe_unused]] 
constexpr bool __x_to_x
 
 1843      = 
sizeof(__v0) <= 16 && 
sizeof(_To) <= 16;
 
 1844    [[maybe_unused]] 
constexpr bool __x_to_y
 
 1845      = 
sizeof(__v0) <= 16 && 
sizeof(_To) == 32;
 
 1846    [[maybe_unused]] 
constexpr bool __x_to_z
 
 1847      = 
sizeof(__v0) <= 16 && 
sizeof(_To) == 64;
 
 1848    [[maybe_unused]] 
constexpr bool __y_to_x
 
 1849      = 
sizeof(__v0) == 32 && 
sizeof(_To) <= 16;
 
 1850    [[maybe_unused]] 
constexpr bool __y_to_y
 
 1851      = 
sizeof(__v0) == 32 && 
sizeof(_To) == 32;
 
 1852    [[maybe_unused]] 
constexpr bool __y_to_z
 
 1853      = 
sizeof(__v0) == 32 && 
sizeof(_To) == 64;
 
 1854    [[maybe_unused]] 
constexpr bool __z_to_x
 
 1855      = 
sizeof(__v0) == 64 && 
sizeof(_To) <= 16;
 
 1856    [[maybe_unused]] 
constexpr bool __z_to_y
 
 1857      = 
sizeof(__v0) == 64 && 
sizeof(_To) == 32;
 
 1858    [[maybe_unused]] 
constexpr bool __z_to_z
 
 1859      = 
sizeof(__v0) == 64 && 
sizeof(_To) == 64;
 
 1862    [[maybe_unused]] 
constexpr bool __i_to_i
 
 1863      = is_integral_v<_Up> && is_integral_v<_Tp>;
 
 1864    [[maybe_unused]] 
constexpr bool __i64_to_i8
 
 1865      = __i_to_i && 
sizeof(_Tp) == 8 && 
sizeof(_Up) == 1;
 
 1866    [[maybe_unused]] 
constexpr bool __f64_to_i8
 
 1867      = is_integral_v<_Up> && 
sizeof(_Up) == 1
 
 1868        && is_floating_point_v<_Tp> && 
sizeof(_Tp) == 8;
 
 1870    if constexpr (__i_to_i) 
 
 1872        static_assert(__x_to_x || __have_avx2,
 
 1873                      "integral conversions with ymm registers require AVX2");
 
 1874        static_assert(__have_avx512bw
 
 1875                        || ((
sizeof(_Tp) >= 4 || 
sizeof(__v0) < 64)
 
 1876                            && (
sizeof(_Up) >= 4 || 
sizeof(_To) < 64)),
 
 1877                      "8/16-bit integers in zmm registers require AVX512BW");
 
 1878        static_assert((
sizeof(__v0) < 64 && 
sizeof(_To) < 64) || __have_avx512f,
 
 1879                      "integral conversions with ymm registers require AVX2");
 
 1882    if constexpr (
sizeof(__v0) < 16 || (
sizeof(__v0) == 16 && __have_avx2)
 
 1883                  || (
sizeof(__v0) == 16 && __have_avx
 
 1884                      && is_floating_point_v<_Tp>)
 
 1885                  || (
sizeof(__v0) == 32 && __have_avx512f))
 
 1889        return __convert_x86<_To>(__concat(__v0, __v1), __concat(__v2, __v3),
 
 1890                                  __concat(__v4, __v5), __concat(__v6, __v7));
 
 1897          !(is_floating_point_v<
 
 1898              _Tp> == is_floating_point_v<_Up> && 
sizeof(_Tp) == 
sizeof(_Up)));
 
 1899        static_assert(!(8 * _Np < _M && 
sizeof(_To) > 16),
 
 1900                      "zero extension should be impossible");
 
 1901        if constexpr (__i64_to_i8) 
 
 1903            if constexpr (__x_to_x && __have_ssse3)
 
 1906                return __intrin_bitcast<_To>(_mm_shuffle_epi8(
 
 1908                    (((__v0 & 0xff) | ((__v1 & 0xff) << 8))
 
 1909                     | (((__v2 & 0xff) << 16) | ((__v3 & 0xff) << 24)))
 
 1910                    | ((((__v4 & 0xff) << 32) | ((__v5 & 0xff) << 40))
 
 1911                       | (((__v6 & 0xff) << 48) | (__v7 << 56)))),
 
 1912                  _mm_setr_epi8(0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14,
 
 1915            else if constexpr (__x_to_x)
 
 1917                const auto __a = _mm_unpacklo_epi8(__i0, __i1); 
 
 1918                const auto __b = _mm_unpackhi_epi8(__i0, __i1); 
 
 1919                const auto __c = _mm_unpacklo_epi8(__i2, __i3); 
 
 1920                const auto __d = _mm_unpackhi_epi8(__i2, __i3); 
 
 1921                const auto __e = _mm_unpacklo_epi8(__i4, __i5); 
 
 1922                const auto __f = _mm_unpackhi_epi8(__i4, __i5); 
 
 1923                const auto __g = _mm_unpacklo_epi8(__i6, __i7); 
 
 1924                const auto __h = _mm_unpackhi_epi8(__i6, __i7); 
 
 1925                return __intrin_bitcast<_To>(_mm_unpacklo_epi64(
 
 1926                  _mm_unpacklo_epi32(_mm_unpacklo_epi8(__a, __b),  
 
 1927                                     _mm_unpacklo_epi8(__c, __d)), 
 
 1928                  _mm_unpacklo_epi32(_mm_unpacklo_epi8(__e, __f),  
 
 1929                                     _mm_unpacklo_epi8(__g, __h))  
 
 1932            else if constexpr (__y_to_y)
 
 1936                    (((__v0 & 0xff) | ((__v1 & 0xff) << 8))
 
 1937                     | (((__v2 & 0xff) << 16) | ((__v3 & 0xff) << 24)))
 
 1938                    | ((((__v4 & 0xff) << 32) | ((__v5 & 0xff) << 40))
 
 1939                       | (((__v6 & 0xff) << 48) | ((__v7 << 56)))));
 
 1950                auto __b = _mm256_shuffle_epi8( 
 
 1952                  __a, _mm256_setr_epi8(0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13,
 
 1953                                        6, 14, 7, 15, 0, 8, 1, 9, 2, 10, 3, 11,
 
 1954                                        4, 12, 5, 13, 6, 14, 7, 15));
 
 1957                return __intrin_bitcast<_To>(_mm256_shuffle_epi8(
 
 1958                  __c, _mm256_setr_epi8(0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13,
 
 1959                                        6, 7, 14, 15, 0, 1, 8, 9, 2, 3, 10, 11,
 
 1960                                        4, 5, 12, 13, 6, 7, 14, 15)));
 
 1962            else if constexpr (__z_to_z)
 
 1965                  __convert_x86<__vector_type_t<_Up, _M / 2>>(__v0, __v1, __v2,
 
 1967                  __convert_x86<__vector_type_t<_Up, _M / 2>>(__v4, __v5, __v6,
 
 1971        else if constexpr (__f64_to_i8) 
 
 1973            return __convert_x86<_To>(
 
 1974              __convert_x86<__vector_type_t<int, _Np * 2>>(__v0, __v1),
 
 1975              __convert_x86<__vector_type_t<int, _Np * 2>>(__v2, __v3),
 
 1976              __convert_x86<__vector_type_t<int, _Np * 2>>(__v4, __v5),
 
 1977              __convert_x86<__vector_type_t<int, _Np * 2>>(__v6, __v7));
 
 1980          __assert_unreachable<_Tp>();
 
 1984        if constexpr (
sizeof(_To) >= 32)
 
 1987            __convert_x86<__vector_type_t<_Up, _M / 2>>(__v0, __v1, __v2, __v3),
 
 1988            __convert_x86<__vector_type_t<_Up, _M / 2>>(__v4, __v5, __v6,
 
 1990        else if constexpr (
sizeof(_To) == 16)
 
 1993              = __to_intrin(__convert_x86<_To>(__v0, __v1, __v2, __v3));
 
 1995              = __to_intrin(__convert_x86<_To>(__v4, __v5, __v6, __v7));
 
 1996            static_assert(
sizeof(_Up) == 1 && _Np == 2);
 
 1997            return __intrin_bitcast<_To>(_mm_unpacklo_epi64(__lo, __hi));
 
 2001            __assert_unreachable<_Tp>();
 
 2011template <
typename _To, 
typename _V, 
typename _Traits>
 
 2012  _GLIBCXX_SIMD_INTRINSIC _To
 
 2013  __convert_x86(_V __v0, _V __v1, _V __v2, _V __v3, _V __v4, _V __v5, _V __v6,
 
 2014                _V __v7, _V __v8, _V __v9, _V __v10, _V __v11, _V __v12,
 
 2015                _V __v13, _V __v14, _V __v15)
 
 2018    return __convert_x86<_To>(__concat(__v0, __v1), __concat(__v2, __v3),
 
 2019                              __concat(__v4, __v5), __concat(__v6, __v7),
 
 2020                              __concat(__v8, __v9), __concat(__v10, __v11),
 
 2021                              __concat(__v12, __v13), __concat(__v14, __v15));
 
constexpr const _Tp & max(const _Tp &, const _Tp &)
This does what you think it does.
 
constexpr const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.