Taskflow: A General-purpose Task-parallel Programming System: taskflow/algorithm/partitioner.hpp Source File

// reference:

// - gomp: https://github.com/gcc-mirror/gcc/blob/master/libgomp/iter.c

// - komp: https://github.com/llvm-mirror/openmp/blob/master/runtime/src/kmp_dispatch.cpp


#pragma once


namespace tf {


enum class PartitionerType : int {

  STATIC,

  DYNAMIC

};


//template <typename C>

//class PartitionInvoker : public PartitionerBase {

//

//  protected

//

//  C _closure;

//

//  template <typename... ArgsT>

//  auto operator()(ArgsT&&... args) {

//    return std::invoke(closure, std::forward<ArgsT>(args)...);

//  }

//

//  template <typename... ArgsT>

//  auto operator()(ArgsT&&... args) const {

//    return std::invoke(closure, std::forward<ArgsT>(args)...);

//  }

//

//};


class DefaultClosureWrapper {};


// ----------------------------------------------------------------------------

// Partitioner Base

// ----------------------------------------------------------------------------


template <typename C = DefaultClosureWrapper>


class PartitionerBase {


  public:


  constexpr static bool is_default_wrapper_v = std::is_same_v<C, DefaultClosureWrapper>;


  using closure_wrapper_type = C;


  PartitionerBase() = default;


  explicit PartitionerBase(size_t chunk_size) : _chunk_size {chunk_size} {}


  PartitionerBase(size_t chunk_size, C&& closure_wrapper) :

    _chunk_size {chunk_size},

    _closure_wrapper {std::forward<C>(closure_wrapper)} {

  }


  size_t chunk_size() const { return _chunk_size; }


  void chunk_size(size_t cz) { _chunk_size = cz; }


  const C& closure_wrapper() const { return _closure_wrapper; }


  C& closure_wrapper() { return _closure_wrapper; }


  template <typename F>

  void closure_wrapper(F&& fn) { _closure_wrapper = std::forward<F>(fn); }


  template <typename F>


  TF_FORCE_INLINE decltype(auto) operator () (F&& callable) {

    if constexpr(is_default_wrapper_v) {

      return std::forward<F>(callable);

    }

    else {

      // closure wrapper is stateful - capture it by reference

      return [this, c=std::forward<F>(callable)]() mutable { _closure_wrapper(c); };

    }

  }


  protected:


  size_t _chunk_size{0};


  C _closure_wrapper;

};


// ----------------------------------------------------------------------------

// Static Partitioner

// ----------------------------------------------------------------------------


template <typename C = DefaultClosureWrapper>


class StaticPartitioner : public PartitionerBase<C> {


  public:


  static constexpr PartitionerType type() { return PartitionerType::STATIC; }


  StaticPartitioner() = default;


  explicit StaticPartitioner(size_t sz) : PartitionerBase<C>(sz) {}


  explicit StaticPartitioner(size_t sz, C&& closure) :

    PartitionerBase<C>(sz, std::forward<C>(closure)) {

  }


  size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const {

    return this->_chunk_size ? this->_chunk_size : N/W + (w < N%W);

  }


  // --------------------------------------------------------------------------

  // scheduling methods

  // --------------------------------------------------------------------------


  template <typename F>

  void loop(

    size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func

  ) {

    size_t stride = W * chunk_size;

    while(curr_b < N) {

      size_t curr_e = (std::min)(curr_b + chunk_size, N);

      if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

        if(func(curr_b, curr_e)) {

          return;

        }

      } else {

        func(curr_b, curr_e);

      }

      curr_b += stride;

    }

  }


  template <IndexRangesLike R, typename F>

  void loop(const R& range, size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func) const {

    size_t stride = W * chunk_size;

    while(curr_b < N) {

      size_t curr_e = (std::min)(curr_b + chunk_size, N);

      if constexpr (R::rank == 1) {

        if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

          if(func(range.unravel(curr_b, curr_e))) {

            return;

          }

        } else {

          func(range.unravel(curr_b, curr_e));

        }

        curr_b = curr_e;

      } else {

        while(curr_b < curr_e) {

          auto box = range.lower_slice(curr_b, curr_e - curr_b);

          if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

            if(func(box)) {

              return;

            }

          } else {

            func(box);

          }

          curr_b += box.size();

        }

      }

      curr_b += stride - chunk_size;

    }

  }


};


// ----------------------------------------------------------------------------

// Guided Partitioner

// ----------------------------------------------------------------------------


template <typename C = DefaultClosureWrapper>


class GuidedPartitioner : public PartitionerBase<C> {


  public:


  static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }


  GuidedPartitioner() = default;


  explicit GuidedPartitioner(size_t sz) : PartitionerBase<C> (sz) {}


  explicit GuidedPartitioner(size_t sz, C&& closure) :

    PartitionerBase<C>(sz, std::forward<C>(closure)) {

  }


  // --------------------------------------------------------------------------

  // scheduling methods

  // --------------------------------------------------------------------------


  template <typename F>

  void loop(

    size_t N, size_t W, std::atomic<size_t>& next, F&& func

  ) const {


    size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;


    size_t p1 = 2 * W * (chunk_size + 1);

    float  p2 = 0.5f / static_cast<float>(W);

    size_t curr_b = next.load(std::memory_order_relaxed);


    while(curr_b < N) {


      size_t r = N - curr_b;


      // fine-grained

      if(r < p1) {

        while(1) {

          curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);

          if(curr_b >= N) {

            return;

          }

          if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

            if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {

              return;

            }

          } else {

            func(curr_b, (std::min)(curr_b + chunk_size, N));

          }

        }

        break;

      }

      // coarse-grained

      else {

        size_t q = static_cast<size_t>(p2 * r);

        if(q < chunk_size) {

          q = chunk_size;

        }

        size_t curr_e = (std::min)(curr_b + q, N);

        if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,

                                                        std::memory_order_relaxed)) {

          if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

            if(func(curr_b, curr_e)) {

              return;

            }

          } else {

            func(curr_b, curr_e);

          }

          curr_b = curr_e;

        }

      }

    }

  }


  template <IndexRangesLike R, typename F>

  void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {


    size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;

    size_t p1 = 2 * W * (chunk_size + 1);

    float  p2 = 0.5f / static_cast<float>(W);

    size_t curr_b = next.load(std::memory_order_relaxed);


    while(curr_b < N) {

      size_t r = N - curr_b;

      size_t csize = (r < p1) ? chunk_size : (std::max)(static_cast<size_t>(p2 * r), chunk_size);

      if constexpr (R::rank == 1) {

        size_t curr_e = (std::min)(curr_b + csize, N);

        if(next.compare_exchange_weak(curr_b, curr_e,

                                      std::memory_order_relaxed,

                                      std::memory_order_relaxed)) {

          if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

            if(func(range.unravel(curr_b, curr_e))) {

              return;

            }

          } else {

            func(range.unravel(curr_b, curr_e));

          }

          curr_b = curr_e;

        }

      } else {

        auto box = range.upper_slice(curr_b, csize);

        if(next.compare_exchange_weak(curr_b, curr_b + box.size(),

                                      std::memory_order_relaxed,

                                      std::memory_order_relaxed)) {

          if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

            if(func(box)) {

              return;

            }

          } else {

            func(box);

          }

          curr_b += box.size();

        }

      }

    }

  }


};


// ----------------------------------------------------------------------------

// Dynamic Partitioner

// ----------------------------------------------------------------------------


template <typename C = DefaultClosureWrapper>


class DynamicPartitioner : public PartitionerBase<C> {


  public:


  static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }


  DynamicPartitioner() = default;


  explicit DynamicPartitioner(size_t sz) : PartitionerBase<C>(sz) {}


  explicit DynamicPartitioner(size_t sz, C&& closure) :

    PartitionerBase<C>(sz, std::forward<C>(closure)) {

  }


  // --------------------------------------------------------------------------

  // scheduling methods

  // --------------------------------------------------------------------------


  template <typename F>

  void loop(

    size_t N, size_t, std::atomic<size_t>& next, F&& func

  ) const {


    size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;

    size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);


    while(curr_b < N) {

      if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

        if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {

          return;

        }

      } else {

        func(curr_b, (std::min)(curr_b + chunk_size, N));

      }

      curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);

    }

  }


  template <IndexRangesLike R, typename F>

  void loop(const R& range, size_t N, size_t, std::atomic<size_t>& next, F&& func) const {

    size_t curr_b = next.load(std::memory_order_relaxed);

    size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;


    while(curr_b < N) {

      if constexpr (R::rank == 1) {

        size_t curr_e = (std::min)(curr_b + chunk_size, N);

        if(next.compare_exchange_weak(curr_b, curr_e,

                                      std::memory_order_relaxed,

                                      std::memory_order_relaxed)) {

          if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

            if(func(range.unravel(curr_b, curr_e))) {

              return;

            }

          } else {

            func(range.unravel(curr_b, curr_e));

          }

          curr_b = curr_e;

        }

      } else {

        auto box = range.upper_slice(curr_b, chunk_size);

        if(next.compare_exchange_weak(curr_b, curr_b + box.size(),

                                      std::memory_order_relaxed,

                                      std::memory_order_relaxed)) {

          if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

            if(func(box)) {

              return;

            }

          } else {

            func(box);

          }

          curr_b += box.size();

        }

      }

    }

  }


};


// ----------------------------------------------------------------------------

// RandomPartitioner

// ----------------------------------------------------------------------------


template <typename C = DefaultClosureWrapper>


class RandomPartitioner : public PartitionerBase<C> {


  public:


  static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }


  RandomPartitioner() = default;


  explicit RandomPartitioner(size_t sz) : PartitionerBase<C>(sz) {}


  explicit RandomPartitioner(size_t sz, C&& closure) :

    PartitionerBase<C>(sz, std::forward<C>(closure)) {

  }


  RandomPartitioner(float alpha, float beta) : _alpha{alpha}, _beta{beta} {}


  RandomPartitioner(float alpha, float beta, C&& closure) :

    _alpha {alpha}, _beta {beta},

    PartitionerBase<C>(0, std::forward<C>(closure)) {

  }


  float alpha() const { return _alpha; }


  float beta() const { return _beta; }


  std::pair<size_t, size_t> chunk_size_range(size_t N, size_t W) const {


    size_t b1 = static_cast<size_t>(_alpha * N * W);

    size_t b2 = static_cast<size_t>(_beta  * N * W);


    if(b1 > b2) {

      std::swap(b1, b2);

    }


    b1 = (std::max)(b1, size_t{1});

    b2 = (std::max)(b2, b1 + 1);


    return {b1, b2};

  }


  // --------------------------------------------------------------------------

  // scheduling methods

  // --------------------------------------------------------------------------


  template <typename F>

  void loop(

    size_t N, size_t W, std::atomic<size_t>& next, F&& func

  ) const {


    auto [b1, b2] = chunk_size_range(N, W);


    std::default_random_engine engine {std::random_device{}()};

    std::uniform_int_distribution<size_t> dist(b1, b2);


    size_t chunk_size = dist(engine);

    size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);


    while(curr_b < N) {

      if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {

        if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {

          return;

        }

      } else {

        func(curr_b, (std::min)(curr_b + chunk_size, N));

      }

      chunk_size = dist(engine);

      curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);

    }

  }


  template <IndexRangesLike R, typename F>

  void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {


    auto [b1, b2] = chunk_size_range(N, W);


    std::default_random_engine engine{std::random_device{}()};

    std::uniform_int_distribution<size_t> dist(b1, b2);


    size_t curr_b = next.load(std::memory_order_relaxed);


    while(curr_b < N) {

      if constexpr (R::rank == 1) {

        size_t curr_e = (std::min)(curr_b + dist(engine), N);

        if(next.compare_exchange_weak(curr_b, curr_e,

                                      std::memory_order_relaxed,

                                      std::memory_order_relaxed)) {

          if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

            if(func(range.unravel(curr_b, curr_e))) {

              return;

            }

          } else {

            func(range.unravel(curr_b, curr_e));

          }

          curr_b = curr_e;

        }

      } else {

        auto box = range.upper_slice(curr_b, dist(engine));

        if(next.compare_exchange_weak(curr_b, curr_b + box.size(),

                                      std::memory_order_relaxed,

                                      std::memory_order_relaxed)) {

          if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {

            if(func(box)) {

              return;

            }

          } else {

            func(box);

          }

          curr_b += box.size();

        }

      }

    }

  }


  private:


  float _alpha {0.01f};

  float _beta  {0.50f};

};


using DefaultPartitioner = GuidedPartitioner<>;


template <typename P>

concept PartitionerLike = std::derived_from<P, PartitionerBase<typename P::closure_wrapper_type>>;


template <typename P>

inline constexpr bool is_partitioner_v = PartitionerLike<P>;


}  // end of namespace tf -----------------------------------------------------

tf::DefaultClosureWrapper
class to create a default closure wrapper
Definition partitioner.hpp:51

tf::DynamicPartitioner::DynamicPartitioner
DynamicPartitioner()=default
default constructor

tf::DynamicPartitioner::DynamicPartitioner
DynamicPartitioner(size_t sz, C &&closure)
construct a dynamic partitioner with the given chunk size and the closure
Definition partitioner.hpp:619

tf::DynamicPartitioner::type
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:604

tf::DynamicPartitioner::DynamicPartitioner
DynamicPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:614

tf::GuidedPartitioner
class to create a guided partitioner for scheduling parallel algorithms
Definition partitioner.hpp:417

tf::GuidedPartitioner::GuidedPartitioner
GuidedPartitioner(size_t sz, C &&closure)
construct a guided partitioner with the given chunk size and the closure
Definition partitioner.hpp:440

tf::GuidedPartitioner::GuidedPartitioner
GuidedPartitioner(size_t sz)
construct a guided partitioner with the given chunk size
Definition partitioner.hpp:435

tf::GuidedPartitioner::GuidedPartitioner
GuidedPartitioner()=default
default constructor

tf::GuidedPartitioner::type
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:424

tf::PartitionerBase::PartitionerBase
PartitionerBase(size_t chunk_size)
construct a partitioner with the given chunk size
Definition partitioner.hpp:147

tf::PartitionerBase::is_default_wrapper_v
static constexpr bool is_default_wrapper_v
indicating if the given closure wrapper is a default wrapper (i.e., empty)
Definition partitioner.hpp:132

tf::PartitionerBase::closure_wrapper_type
C closure_wrapper_type
the closure type
Definition partitioner.hpp:137

tf::PartitionerBase::chunk_size
void chunk_size(size_t cz)
update the chunk size of this partitioner
Definition partitioner.hpp:165

tf::PartitionerBase::closure_wrapper
const C & closure_wrapper() const
acquire an immutable access to the closure wrapper object
Definition partitioner.hpp:170

tf::PartitionerBase::closure_wrapper
void closure_wrapper(F &&fn)
modify the closure wrapper object
Definition partitioner.hpp:181

tf::PartitionerBase::PartitionerBase
PartitionerBase(size_t chunk_size, C &&closure_wrapper)
construct a partitioner with the given chunk size and closure wrapper
Definition partitioner.hpp:152

tf::PartitionerBase::closure_wrapper
C & closure_wrapper()
acquire a mutable access to the closure wrapper object
Definition partitioner.hpp:175

tf::PartitionerBase::PartitionerBase
PartitionerBase()=default
default constructor

tf::PartitionerBase::chunk_size
size_t chunk_size() const
query the chunk size of this partitioner
Definition partitioner.hpp:160

tf::RandomPartitioner::RandomPartitioner
RandomPartitioner(size_t sz, C &&closure)
construct a random partitioner with the given chunk size and the closure
Definition partitioner.hpp:759

tf::RandomPartitioner::RandomPartitioner
RandomPartitioner(float alpha, float beta, C &&closure)
constructs a random partitioner with the given parameters and the closure
Definition partitioner.hpp:771

tf::RandomPartitioner::chunk_size_range
std::pair< size_t, size_t > chunk_size_range(size_t N, size_t W) const
queries the range of chunk size
Definition partitioner.hpp:792

tf::RandomPartitioner::RandomPartitioner
RandomPartitioner(float alpha, float beta)
constructs a random partitioner with the given parameters
Definition partitioner.hpp:766

tf::RandomPartitioner::RandomPartitioner
RandomPartitioner()=default
default constructor

tf::RandomPartitioner::type
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:744

tf::RandomPartitioner::alpha
float alpha() const
queries the alpha value
Definition partitioner.hpp:779

tf::RandomPartitioner::RandomPartitioner
RandomPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:754

tf::RandomPartitioner::beta
float beta() const
queries the beta value
Definition partitioner.hpp:784

tf::StaticPartitioner::StaticPartitioner
StaticPartitioner()=default
default constructor

tf::StaticPartitioner::adjusted_chunk_size
size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const
queries the adjusted chunk size
Definition partitioner.hpp:295

tf::StaticPartitioner::StaticPartitioner
StaticPartitioner(size_t sz)
construct a static partitioner with the given chunk size
Definition partitioner.hpp:279

tf::StaticPartitioner::StaticPartitioner
StaticPartitioner(size_t sz, C &&closure)
construct a static partitioner with the given chunk size and the closure
Definition partitioner.hpp:284

tf::StaticPartitioner::type
static constexpr PartitionerType type()
queries the partition type (static)
Definition partitioner.hpp:269

tf::PartitionerLike
determines if a type is a partitioner
Definition partitioner.hpp:906

tf
taskflow namespace
Definition small_vector.hpp:20

tf::TaskType::STATIC
@ STATIC
static task type
Definition task.hpp:25

tf::PartitionerType
PartitionerType
enumeration of all partitioner types
Definition partitioner.hpp:19

tf::PartitionerType::DYNAMIC
@ DYNAMIC
dynamic partitioner type
Definition partitioner.hpp:23

tf::PartitionerType::STATIC
@ STATIC
static partitioner type
Definition partitioner.hpp:21

tf::is_partitioner_v
constexpr bool is_partitioner_v
determines if a type is a partitioner (variable template)
Definition partitioner.hpp:916

tf::DefaultPartitioner
GuidedPartitioner<> DefaultPartitioner
default partitioner set to tf::GuidedPartitioner
Definition partitioner.hpp:898