1// <barrier> -*- C++ -*-
 
    3// Copyright (C) 2020-2021 Free Software Foundation, Inc.
 
    5// This file is part of the GNU ISO C++ Library.  This library is free
 
    6// software; you can redistribute it and/or modify it under the
 
    7// terms of the GNU General Public License as published by the
 
    8// Free Software Foundation; either version 3, or (at your option)
 
   11// This library is distributed in the hope that it will be useful,
 
   12// but WITHOUT ANY WARRANTY; without even the implied warranty of
 
   13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
   14// GNU General Public License for more details.
 
   16// Under Section 7 of GPL version 3, you are granted additional
 
   17// permissions described in the GCC Runtime Library Exception, version
 
   18// 3.1, as published by the Free Software Foundation.
 
   20// You should have received a copy of the GNU General Public License and
 
   21// a copy of the GCC Runtime Library Exception along with this program;
 
   22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
   23// <http://www.gnu.org/licenses/>.
 
   25// This implementation is based on libcxx/include/barrier
 
   26//===-- barrier.h --------------------------------------------------===//
 
   28// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 
   29// See https://llvm.org/LICENSE.txt for license information.
 
   30// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
   32//===---------------------------------------------------------------===//
 
   34/** @file include/barrier
 
   35 *  This is a Standard C++ Library header.
 
   38#ifndef _GLIBCXX_BARRIER
 
   39#define _GLIBCXX_BARRIER 1
 
   41#pragma GCC system_header
 
   43#if __cplusplus > 201703L
 
   44#include <bits/atomic_base.h>
 
   45#if __cpp_lib_atomic_wait && __cpp_aligned_new
 
   46#include <bits/std_thread.h>
 
   47#include <bits/unique_ptr.h>
 
   51#define __cpp_lib_barrier 201907L
 
   53namespace std _GLIBCXX_VISIBILITY(default)
 
   55_GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   57  struct __empty_completion
 
   59    _GLIBCXX_ALWAYS_INLINE void
 
   66The default implementation of __tree_barrier is a classic tree barrier.
 
   68It looks different from literature pseudocode for two main reasons:
 
   69 1. Threads that call into std::barrier functions do not provide indices,
 
   70    so a numbering step is added before the actual barrier algorithm,
 
   71    appearing as an N+1 round to the N rounds of the tree barrier.
 
   72 2. A great deal of attention has been paid to avoid cache line thrashing
 
   73    by flattening the tree structure into cache-line sized arrays, that
 
   74    are indexed in an efficient way.
 
   78  enum class __barrier_phase_t : unsigned char { };
 
   80  template<typename _CompletionF>
 
   83      using __atomic_phase_ref_t = std::__atomic_ref<__barrier_phase_t>;
 
   84      using __atomic_phase_const_ref_t = std::__atomic_ref<const __barrier_phase_t>;
 
   85      static constexpr auto __phase_alignment =
 
   86                      __atomic_phase_ref_t::required_alignment;
 
   88      using __tickets_t = std::array<__barrier_phase_t, 64>;
 
   89      struct alignas(64) /* naturally-align the heap state */ __state_t
 
   91        alignas(__phase_alignment) __tickets_t __tickets;
 
   94      ptrdiff_t _M_expected;
 
   95      unique_ptr<__state_t[]> _M_state;
 
   96      __atomic_base<ptrdiff_t> _M_expected_adjustment;
 
   97      _CompletionF _M_completion;
 
   99      alignas(__phase_alignment) __barrier_phase_t  _M_phase;
 
  102      _M_arrive(__barrier_phase_t __old_phase, size_t __current)
 
  104        const auto __old_phase_val = static_cast<unsigned char>(__old_phase);
 
  105        const auto __half_step =
 
  106                           static_cast<__barrier_phase_t>(__old_phase_val + 1);
 
  107        const auto __full_step =
 
  108                           static_cast<__barrier_phase_t>(__old_phase_val + 2);
 
  110        size_t __current_expected = _M_expected;
 
  111        __current %= ((_M_expected + 1) >> 1);
 
  113        for (int __round = 0; ; ++__round)
 
  115            if (__current_expected <= 1)
 
  117            size_t const __end_node = ((__current_expected + 1) >> 1),
 
  118                         __last_node = __end_node - 1;
 
  119            for ( ; ; ++__current)
 
  121                if (__current == __end_node)
 
  123                auto __expect = __old_phase;
 
  124                __atomic_phase_ref_t __phase(_M_state[__current]
 
  125                                                .__tickets[__round]);
 
  126                if (__current == __last_node && (__current_expected & 1))
 
  128                    if (__phase.compare_exchange_strong(__expect, __full_step,
 
  129                                                        memory_order_acq_rel))
 
  130                      break;     // I'm 1 in 1, go to next __round
 
  132                else if (__phase.compare_exchange_strong(__expect, __half_step,
 
  133                                                         memory_order_acq_rel))
 
  135                    return false; // I'm 1 in 2, done with arrival
 
  137                else if (__expect == __half_step)
 
  139                    if (__phase.compare_exchange_strong(__expect, __full_step,
 
  140                                                        memory_order_acq_rel))
 
  141                      break;    // I'm 2 in 2, go to next __round
 
  144            __current_expected = __last_node + 1;
 
  150      using arrival_token = __barrier_phase_t;
 
  152      static constexpr ptrdiff_t
 
  154      { return __PTRDIFF_MAX__; }
 
  156      __tree_barrier(ptrdiff_t __expected, _CompletionF __completion)
 
  157          : _M_expected(__expected), _M_expected_adjustment(0),
 
  158            _M_completion(move(__completion)),
 
  159            _M_phase(static_cast<__barrier_phase_t>(0))
 
  161        size_t const __count = (_M_expected + 1) >> 1;
 
  163        _M_state = std::make_unique<__state_t[]>(__count);
 
  166      [[nodiscard]] arrival_token
 
  167      arrive(ptrdiff_t __update)
 
  169        std::hash<std::thread::id> __hasher;
 
  170        size_t __current = __hasher(std::this_thread::get_id());
 
  171        __atomic_phase_ref_t __phase(_M_phase);
 
  172        const auto __old_phase = __phase.load(memory_order_relaxed);
 
  173        const auto __cur = static_cast<unsigned char>(__old_phase);
 
  174        for(; __update; --__update)
 
  176            if(_M_arrive(__old_phase, __current))
 
  179                _M_expected += _M_expected_adjustment.load(memory_order_relaxed);
 
  180                _M_expected_adjustment.store(0, memory_order_relaxed);
 
  181                auto __new_phase = static_cast<__barrier_phase_t>(__cur + 2);
 
  182                __phase.store(__new_phase, memory_order_release);
 
  183                __phase.notify_all();
 
  190      wait(arrival_token&& __old_phase) const
 
  192        __atomic_phase_const_ref_t __phase(_M_phase);
 
  193        auto const __test_fn = [=]
 
  195            return __phase.load(memory_order_acquire) != __old_phase;
 
  197        std::__atomic_wait_address(&_M_phase, __test_fn);
 
  203        _M_expected_adjustment.fetch_sub(1, memory_order_relaxed);
 
  208  template<typename _CompletionF = __empty_completion>
 
  211      // Note, we may introduce a "central" barrier algorithm at some point
 
  212      // for more space constrained targets
 
  213      using __algorithm_t = __tree_barrier<_CompletionF>;
 
  217      class arrival_token final
 
  220        arrival_token(arrival_token&&) = default;
 
  221        arrival_token& operator=(arrival_token&&) = default;
 
  222        ~arrival_token() = default;
 
  225        friend class barrier;
 
  226        using __token = typename __algorithm_t::arrival_token;
 
  227        explicit arrival_token(__token __tok) noexcept : _M_tok(__tok) { }
 
  231      static constexpr ptrdiff_t
 
  233      { return __algorithm_t::max(); }
 
  236      barrier(ptrdiff_t __count, _CompletionF __completion = _CompletionF())
 
  237      : _M_b(__count, std::move(__completion))
 
  240      barrier(barrier const&) = delete;
 
  241      barrier& operator=(barrier const&) = delete;
 
  243      [[nodiscard]] arrival_token
 
  244      arrive(ptrdiff_t __update = 1)
 
  245      { return arrival_token{_M_b.arrive(__update)}; }
 
  248      wait(arrival_token&& __phase) const
 
  249      { _M_b.wait(std::move(__phase._M_tok)); }
 
  257      { _M_b.arrive_and_drop(); }
 
  260_GLIBCXX_END_NAMESPACE_VERSION
 
  262#endif // __cpp_lib_atomic_wait && __cpp_aligned_new
 
  263#endif // __cplusplus > 201703L
 
  264#endif // _GLIBCXX_BARRIER