libstdc++
simd_detail.h
1// Internal macros for the simd implementation -*- C++ -*-
2
3// Copyright (C) 2020-2022 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library. This library is free
6// software; you can redistribute it and/or modify it under the
7// terms of the GNU General Public License as published by the
8// Free Software Foundation; either version 3, or (at your option)
9// any later version.
10
11// This library is distributed in the hope that it will be useful,
12// but WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14// GNU General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23// <http://www.gnu.org/licenses/>.
24
25#ifndef _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
26#define _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
27
28#if __cplusplus >= 201703L
29
30#include <cstddef>
31#include <cstdint>
32
33/// @cond undocumented
34
35#define _GLIBCXX_SIMD_BEGIN_NAMESPACE \
36 namespace std _GLIBCXX_VISIBILITY(default) \
37 { \
38 _GLIBCXX_BEGIN_NAMESPACE_VERSION \
39 namespace experimental { \
40 inline namespace parallelism_v2 {
41#define _GLIBCXX_SIMD_END_NAMESPACE \
42 } \
43 } \
44 _GLIBCXX_END_NAMESPACE_VERSION \
45 }
46
47// ISA extension detection. The following defines all the _GLIBCXX_SIMD_HAVE_XXX
48// macros ARM{{{
49#if defined __ARM_NEON
50#define _GLIBCXX_SIMD_HAVE_NEON 1
51#else
52#define _GLIBCXX_SIMD_HAVE_NEON 0
53#endif
54#if defined __ARM_NEON && (__ARM_ARCH >= 8 || defined __aarch64__)
55#define _GLIBCXX_SIMD_HAVE_NEON_A32 1
56#else
57#define _GLIBCXX_SIMD_HAVE_NEON_A32 0
58#endif
59#if defined __ARM_NEON && defined __aarch64__
60#define _GLIBCXX_SIMD_HAVE_NEON_A64 1
61#else
62#define _GLIBCXX_SIMD_HAVE_NEON_A64 0
63#endif
64//}}}
65// x86{{{
66#ifdef __MMX__
67#define _GLIBCXX_SIMD_HAVE_MMX 1
68#else
69#define _GLIBCXX_SIMD_HAVE_MMX 0
70#endif
71#if defined __SSE__ || defined __x86_64__
72#define _GLIBCXX_SIMD_HAVE_SSE 1
73#else
74#define _GLIBCXX_SIMD_HAVE_SSE 0
75#endif
76#if defined __SSE2__ || defined __x86_64__
77#define _GLIBCXX_SIMD_HAVE_SSE2 1
78#else
79#define _GLIBCXX_SIMD_HAVE_SSE2 0
80#endif
81#ifdef __SSE3__
82#define _GLIBCXX_SIMD_HAVE_SSE3 1
83#else
84#define _GLIBCXX_SIMD_HAVE_SSE3 0
85#endif
86#ifdef __SSSE3__
87#define _GLIBCXX_SIMD_HAVE_SSSE3 1
88#else
89#define _GLIBCXX_SIMD_HAVE_SSSE3 0
90#endif
91#ifdef __SSE4_1__
92#define _GLIBCXX_SIMD_HAVE_SSE4_1 1
93#else
94#define _GLIBCXX_SIMD_HAVE_SSE4_1 0
95#endif
96#ifdef __SSE4_2__
97#define _GLIBCXX_SIMD_HAVE_SSE4_2 1
98#else
99#define _GLIBCXX_SIMD_HAVE_SSE4_2 0
100#endif
101#ifdef __XOP__
102#define _GLIBCXX_SIMD_HAVE_XOP 1
103#else
104#define _GLIBCXX_SIMD_HAVE_XOP 0
105#endif
106#ifdef __AVX__
107#define _GLIBCXX_SIMD_HAVE_AVX 1
108#else
109#define _GLIBCXX_SIMD_HAVE_AVX 0
110#endif
111#ifdef __AVX2__
112#define _GLIBCXX_SIMD_HAVE_AVX2 1
113#else
114#define _GLIBCXX_SIMD_HAVE_AVX2 0
115#endif
116#ifdef __BMI__
117#define _GLIBCXX_SIMD_HAVE_BMI1 1
118#else
119#define _GLIBCXX_SIMD_HAVE_BMI1 0
120#endif
121#ifdef __BMI2__
122#define _GLIBCXX_SIMD_HAVE_BMI2 1
123#else
124#define _GLIBCXX_SIMD_HAVE_BMI2 0
125#endif
126#ifdef __LZCNT__
127#define _GLIBCXX_SIMD_HAVE_LZCNT 1
128#else
129#define _GLIBCXX_SIMD_HAVE_LZCNT 0
130#endif
131#ifdef __SSE4A__
132#define _GLIBCXX_SIMD_HAVE_SSE4A 1
133#else
134#define _GLIBCXX_SIMD_HAVE_SSE4A 0
135#endif
136#ifdef __FMA__
137#define _GLIBCXX_SIMD_HAVE_FMA 1
138#else
139#define _GLIBCXX_SIMD_HAVE_FMA 0
140#endif
141#ifdef __FMA4__
142#define _GLIBCXX_SIMD_HAVE_FMA4 1
143#else
144#define _GLIBCXX_SIMD_HAVE_FMA4 0
145#endif
146#ifdef __F16C__
147#define _GLIBCXX_SIMD_HAVE_F16C 1
148#else
149#define _GLIBCXX_SIMD_HAVE_F16C 0
150#endif
151#ifdef __POPCNT__
152#define _GLIBCXX_SIMD_HAVE_POPCNT 1
153#else
154#define _GLIBCXX_SIMD_HAVE_POPCNT 0
155#endif
156#ifdef __AVX512F__
157#define _GLIBCXX_SIMD_HAVE_AVX512F 1
158#else
159#define _GLIBCXX_SIMD_HAVE_AVX512F 0
160#endif
161#ifdef __AVX512DQ__
162#define _GLIBCXX_SIMD_HAVE_AVX512DQ 1
163#else
164#define _GLIBCXX_SIMD_HAVE_AVX512DQ 0
165#endif
166#ifdef __AVX512VL__
167#define _GLIBCXX_SIMD_HAVE_AVX512VL 1
168#else
169#define _GLIBCXX_SIMD_HAVE_AVX512VL 0
170#endif
171#ifdef __AVX512BW__
172#define _GLIBCXX_SIMD_HAVE_AVX512BW 1
173#else
174#define _GLIBCXX_SIMD_HAVE_AVX512BW 0
175#endif
176#ifdef __AVX512BITALG__
177#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 1
178#else
179#define _GLIBCXX_SIMD_HAVE_AVX512BITALG 0
180#endif
181#ifdef __AVX512VBMI2__
182#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 1
183#else
184#define _GLIBCXX_SIMD_HAVE_AVX512VBMI2 0
185#endif
186#ifdef __AVX512VBMI__
187#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 1
188#else
189#define _GLIBCXX_SIMD_HAVE_AVX512VBMI 0
190#endif
191#ifdef __AVX512IFMA__
192#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 1
193#else
194#define _GLIBCXX_SIMD_HAVE_AVX512IFMA 0
195#endif
196#ifdef __AVX512CD__
197#define _GLIBCXX_SIMD_HAVE_AVX512CD 1
198#else
199#define _GLIBCXX_SIMD_HAVE_AVX512CD 0
200#endif
201#ifdef __AVX512VNNI__
202#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 1
203#else
204#define _GLIBCXX_SIMD_HAVE_AVX512VNNI 0
205#endif
206#ifdef __AVX512VPOPCNTDQ__
207#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 1
208#else
209#define _GLIBCXX_SIMD_HAVE_AVX512VPOPCNTDQ 0
210#endif
211#ifdef __AVX512VP2INTERSECT__
212#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 1
213#else
214#define _GLIBCXX_SIMD_HAVE_AVX512VP2INTERSECT 0
215#endif
216
217#if _GLIBCXX_SIMD_HAVE_SSE
218#define _GLIBCXX_SIMD_HAVE_SSE_ABI 1
219#else
220#define _GLIBCXX_SIMD_HAVE_SSE_ABI 0
221#endif
222#if _GLIBCXX_SIMD_HAVE_SSE2
223#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 1
224#else
225#define _GLIBCXX_SIMD_HAVE_FULL_SSE_ABI 0
226#endif
227
228#if _GLIBCXX_SIMD_HAVE_AVX
229#define _GLIBCXX_SIMD_HAVE_AVX_ABI 1
230#else
231#define _GLIBCXX_SIMD_HAVE_AVX_ABI 0
232#endif
233#if _GLIBCXX_SIMD_HAVE_AVX2
234#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 1
235#else
236#define _GLIBCXX_SIMD_HAVE_FULL_AVX_ABI 0
237#endif
238
239#if _GLIBCXX_SIMD_HAVE_AVX512F
240#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 1
241#else
242#define _GLIBCXX_SIMD_HAVE_AVX512_ABI 0
243#endif
244#if _GLIBCXX_SIMD_HAVE_AVX512BW
245#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 1
246#else
247#define _GLIBCXX_SIMD_HAVE_FULL_AVX512_ABI 0
248#endif
249
250#if defined __x86_64__ && !_GLIBCXX_SIMD_HAVE_SSE2
251#error "Use of SSE2 is required on AMD64"
252#endif
253//}}}
254
255#ifdef __clang__
256#define _GLIBCXX_SIMD_NORMAL_MATH
257#else
258#define _GLIBCXX_SIMD_NORMAL_MATH \
259 [[__gnu__::__optimize__("finite-math-only,no-signed-zeros")]]
260#endif
261#define _GLIBCXX_SIMD_NEVER_INLINE [[__gnu__::__noinline__]]
262#define _GLIBCXX_SIMD_INTRINSIC \
263 [[__gnu__::__always_inline__, __gnu__::__artificial__]] inline
264#define _GLIBCXX_SIMD_ALWAYS_INLINE [[__gnu__::__always_inline__]] inline
265#define _GLIBCXX_SIMD_IS_UNLIKELY(__x) __builtin_expect(__x, 0)
266#define _GLIBCXX_SIMD_IS_LIKELY(__x) __builtin_expect(__x, 1)
267
268#if defined __STRICT_ANSI__ && __STRICT_ANSI__
269#define _GLIBCXX_SIMD_CONSTEXPR
270#define _GLIBCXX_SIMD_USE_CONSTEXPR_API const
271#else
272#define _GLIBCXX_SIMD_CONSTEXPR constexpr
273#define _GLIBCXX_SIMD_USE_CONSTEXPR_API constexpr
274#endif
275
276#if defined __clang__
277#define _GLIBCXX_SIMD_USE_CONSTEXPR const
278#else
279#define _GLIBCXX_SIMD_USE_CONSTEXPR constexpr
280#endif
281
282#define _GLIBCXX_SIMD_LIST_BINARY(__macro) __macro(|) __macro(&) __macro(^)
283#define _GLIBCXX_SIMD_LIST_SHIFTS(__macro) __macro(<<) __macro(>>)
284#define _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) \
285 __macro(+) __macro(-) __macro(*) __macro(/) __macro(%)
286
287#define _GLIBCXX_SIMD_ALL_BINARY(__macro) \
288 _GLIBCXX_SIMD_LIST_BINARY(__macro) static_assert(true)
289#define _GLIBCXX_SIMD_ALL_SHIFTS(__macro) \
290 _GLIBCXX_SIMD_LIST_SHIFTS(__macro) static_assert(true)
291#define _GLIBCXX_SIMD_ALL_ARITHMETICS(__macro) \
292 _GLIBCXX_SIMD_LIST_ARITHMETICS(__macro) static_assert(true)
293
294#ifdef _GLIBCXX_SIMD_NO_ALWAYS_INLINE
295#undef _GLIBCXX_SIMD_ALWAYS_INLINE
296#define _GLIBCXX_SIMD_ALWAYS_INLINE inline
297#undef _GLIBCXX_SIMD_INTRINSIC
298#define _GLIBCXX_SIMD_INTRINSIC inline
299#endif
300
301#if _GLIBCXX_SIMD_HAVE_SSE || _GLIBCXX_SIMD_HAVE_MMX
302#define _GLIBCXX_SIMD_X86INTRIN 1
303#else
304#define _GLIBCXX_SIMD_X86INTRIN 0
305#endif
306
307// workaround macros {{{
308// use aliasing loads to help GCC understand the data accesses better
309// This also seems to hide a miscompilation on swap(x[i], x[i + 1]) with
310// fixed_size_simd<float, 16> x.
311#define _GLIBCXX_SIMD_USE_ALIASING_LOADS 1
312
313// vector conversions on x86 not optimized:
314#if _GLIBCXX_SIMD_X86INTRIN
315#define _GLIBCXX_SIMD_WORKAROUND_PR85048 1
316#endif
317
318// integer division not optimized
319#ifndef __clang__
320#define _GLIBCXX_SIMD_WORKAROUND_PR90993 1
321#endif
322
323// very bad codegen for extraction and concatenation of 128/256 "subregisters"
324// with sizeof(element type) < 8: https://godbolt.org/g/mqUsgM
325#if _GLIBCXX_SIMD_X86INTRIN
326#define _GLIBCXX_SIMD_WORKAROUND_XXX_1 1
327#endif
328
329// bad codegen for 8 Byte memcpy to __vector_type_t<char, 16>
330#define _GLIBCXX_SIMD_WORKAROUND_PR90424 1
331
332// bad codegen for zero-extend using simple concat(__x, 0)
333#if _GLIBCXX_SIMD_X86INTRIN
334#define _GLIBCXX_SIMD_WORKAROUND_XXX_3 1
335#endif
336
337// https://github.com/cplusplus/parallelism-ts/issues/65 (incorrect return type
338// of static_simd_cast)
339#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE65 1
340
341// https://github.com/cplusplus/parallelism-ts/issues/66 (incorrect SFINAE
342// constraint on (static)_simd_cast)
343#define _GLIBCXX_SIMD_FIX_P2TS_ISSUE66 1
344// }}}
345
346/// @endcond
347
348#endif // __cplusplus >= 201703L
349#endif // _GLIBCXX_EXPERIMENTAL_SIMD_DETAIL_H_
350
351// vim: foldmethod=marker