Loading...
Searching...
No Matches
partitioner.hpp
1// reference:
2// - gomp: https://github.com/gcc-mirror/gcc/blob/master/libgomp/iter.c
3// - komp: https://github.com/llvm-mirror/openmp/blob/master/runtime/src/kmp_dispatch.cpp
4
5#pragma once
6
11
12namespace tf {
13
19enum class PartitionerType : int {
24};
25
26
27//template <typename C>
28//class PartitionInvoker : public PartitionerBase {
29//
30// protected
31//
32// C _closure;
33//
34// template <typename... ArgsT>
35// auto operator()(ArgsT&&... args) {
36// return std::invoke(closure, std::forward<ArgsT>(args)...);
37// }
38//
39// template <typename... ArgsT>
40// auto operator()(ArgsT&&... args) const {
41// return std::invoke(closure, std::forward<ArgsT>(args)...);
42// }
43//
44//};
45
52
53
54
55// ----------------------------------------------------------------------------
56// Partitioner Base
57// ----------------------------------------------------------------------------
58
124template <typename C = DefaultClosureWrapper>
126
127 public:
128
132 constexpr static bool is_default_wrapper_v = std::is_same_v<C, DefaultClosureWrapper>;
133
138
142 PartitionerBase() = default;
143
147 explicit PartitionerBase(size_t chunk_size) : _chunk_size {chunk_size} {}
148
153 _chunk_size {chunk_size},
154 _closure_wrapper {std::forward<C>(closure_wrapper)} {
155 }
156
160 size_t chunk_size() const { return _chunk_size; }
161
165 void chunk_size(size_t cz) { _chunk_size = cz; }
166
170 const C& closure_wrapper() const { return _closure_wrapper; }
171
175 C& closure_wrapper() { return _closure_wrapper; }
176
180 template <typename F>
181 void closure_wrapper(F&& fn) { _closure_wrapper = std::forward<F>(fn); }
182
186 template <typename F>
187 TF_FORCE_INLINE decltype(auto) operator () (F&& callable) {
188 if constexpr(is_default_wrapper_v) {
189 return std::forward<F>(callable);
190 }
191 else {
192 // closure wrapper is stateful - capture it by reference
193 return [this, c=std::forward<F>(callable)]() mutable { _closure_wrapper(c); };
194 }
195 }
196
197 protected:
198
202 size_t _chunk_size{0};
203
207 C _closure_wrapper;
208};
209
210// ----------------------------------------------------------------------------
211// Static Partitioner
212// ----------------------------------------------------------------------------
213
261template <typename C = DefaultClosureWrapper>
263
264 public:
265
269 static constexpr PartitionerType type() { return PartitionerType::STATIC; }
270
274 StaticPartitioner() = default;
275
279 explicit StaticPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
280
284 explicit StaticPartitioner(size_t sz, C&& closure) :
285 PartitionerBase<C>(sz, std::forward<C>(closure)) {
286 }
287
295 size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const {
296 return this->_chunk_size ? this->_chunk_size : N/W + (w < N%W);
297 }
298
299 // --------------------------------------------------------------------------
300 // scheduling methods
301 // --------------------------------------------------------------------------
302
306 template <typename F>
307 void loop(
308 size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func
309 ) {
310 size_t stride = W * chunk_size;
311 while(curr_b < N) {
312 size_t curr_e = (std::min)(curr_b + chunk_size, N);
313 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
314 if(func(curr_b, curr_e)) {
315 return;
316 }
317 } else {
318 func(curr_b, curr_e);
319 }
320 curr_b += stride;
321 }
322 }
323
340 template <IndexRangesLike R, typename F>
341 void loop(const R& range, size_t N, size_t W, size_t curr_b, size_t chunk_size, F&& func) const {
342 size_t stride = W * chunk_size;
343 while(curr_b < N) {
344 size_t curr_e = (std::min)(curr_b + chunk_size, N);
345 if constexpr (R::rank == 1) {
346 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
347 if(func(range.unravel(curr_b, curr_e))) {
348 return;
349 }
350 } else {
351 func(range.unravel(curr_b, curr_e));
352 }
353 curr_b = curr_e;
354 } else {
355 while(curr_b < curr_e) {
356 auto box = range.lower_slice(curr_b, curr_e - curr_b);
357 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
358 if(func(box)) {
359 return;
360 }
361 } else {
362 func(box);
363 }
364 curr_b += box.size();
365 }
366 }
367 curr_b += stride - chunk_size;
368 }
369 }
370
371};
372
373// ----------------------------------------------------------------------------
374// Guided Partitioner
375// ----------------------------------------------------------------------------
376
416template <typename C = DefaultClosureWrapper>
418
419 public:
420
424 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
425
429 GuidedPartitioner() = default;
430
435 explicit GuidedPartitioner(size_t sz) : PartitionerBase<C> (sz) {}
436
440 explicit GuidedPartitioner(size_t sz, C&& closure) :
441 PartitionerBase<C>(sz, std::forward<C>(closure)) {
442 }
443
444 // --------------------------------------------------------------------------
445 // scheduling methods
446 // --------------------------------------------------------------------------
447
451 template <typename F>
452 void loop(
453 size_t N, size_t W, std::atomic<size_t>& next, F&& func
454 ) const {
455
456 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
457
458 size_t p1 = 2 * W * (chunk_size + 1);
459 float p2 = 0.5f / static_cast<float>(W);
460 size_t curr_b = next.load(std::memory_order_relaxed);
461
462 while(curr_b < N) {
463
464 size_t r = N - curr_b;
465
466 // fine-grained
467 if(r < p1) {
468 while(1) {
469 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
470 if(curr_b >= N) {
471 return;
472 }
473 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
474 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
475 return;
476 }
477 } else {
478 func(curr_b, (std::min)(curr_b + chunk_size, N));
479 }
480 }
481 break;
482 }
483 // coarse-grained
484 else {
485 size_t q = static_cast<size_t>(p2 * r);
486 if(q < chunk_size) {
487 q = chunk_size;
488 }
489 size_t curr_e = (std::min)(curr_b + q, N);
490 if(next.compare_exchange_strong(curr_b, curr_e, std::memory_order_relaxed,
491 std::memory_order_relaxed)) {
492 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
493 if(func(curr_b, curr_e)) {
494 return;
495 }
496 } else {
497 func(curr_b, curr_e);
498 }
499 curr_b = curr_e;
500 }
501 }
502 }
503 }
504
508 template <IndexRangesLike R, typename F>
509 void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {
510
511 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
512 size_t p1 = 2 * W * (chunk_size + 1);
513 float p2 = 0.5f / static_cast<float>(W);
514 size_t curr_b = next.load(std::memory_order_relaxed);
515
516 while(curr_b < N) {
517 size_t r = N - curr_b;
518 size_t csize = (r < p1) ? chunk_size : (std::max)(static_cast<size_t>(p2 * r), chunk_size);
519 if constexpr (R::rank == 1) {
520 size_t curr_e = (std::min)(curr_b + csize, N);
521 if(next.compare_exchange_weak(curr_b, curr_e,
522 std::memory_order_relaxed,
523 std::memory_order_relaxed)) {
524 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
525 if(func(range.unravel(curr_b, curr_e))) {
526 return;
527 }
528 } else {
529 func(range.unravel(curr_b, curr_e));
530 }
531 curr_b = curr_e;
532 }
533 } else {
534 auto box = range.upper_slice(curr_b, csize);
535 if(next.compare_exchange_weak(curr_b, curr_b + box.size(),
536 std::memory_order_relaxed,
537 std::memory_order_relaxed)) {
538 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
539 if(func(box)) {
540 return;
541 }
542 } else {
543 func(box);
544 }
545 curr_b += box.size();
546 }
547 }
548 }
549 }
550
551};
552
553// ----------------------------------------------------------------------------
554// Dynamic Partitioner
555// ----------------------------------------------------------------------------
556
596template <typename C = DefaultClosureWrapper>
598
599 public:
600
604 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
605
610
614 explicit DynamicPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
615
619 explicit DynamicPartitioner(size_t sz, C&& closure) :
620 PartitionerBase<C>(sz, std::forward<C>(closure)) {
621 }
622
623 // --------------------------------------------------------------------------
624 // scheduling methods
625 // --------------------------------------------------------------------------
626
630 template <typename F>
631 void loop(
632 size_t N, size_t, std::atomic<size_t>& next, F&& func
633 ) const {
634
635 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
636 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
637
638 while(curr_b < N) {
639 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
640 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
641 return;
642 }
643 } else {
644 func(curr_b, (std::min)(curr_b + chunk_size, N));
645 }
646 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
647 }
648 }
649
653 template <IndexRangesLike R, typename F>
654 void loop(const R& range, size_t N, size_t, std::atomic<size_t>& next, F&& func) const {
655 size_t curr_b = next.load(std::memory_order_relaxed);
656 size_t chunk_size = (this->_chunk_size == 0) ? size_t{1} : this->_chunk_size;
657
658 while(curr_b < N) {
659 if constexpr (R::rank == 1) {
660 size_t curr_e = (std::min)(curr_b + chunk_size, N);
661 if(next.compare_exchange_weak(curr_b, curr_e,
662 std::memory_order_relaxed,
663 std::memory_order_relaxed)) {
664 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
665 if(func(range.unravel(curr_b, curr_e))) {
666 return;
667 }
668 } else {
669 func(range.unravel(curr_b, curr_e));
670 }
671 curr_b = curr_e;
672 }
673 } else {
674 auto box = range.upper_slice(curr_b, chunk_size);
675 if(next.compare_exchange_weak(curr_b, curr_b + box.size(),
676 std::memory_order_relaxed,
677 std::memory_order_relaxed)) {
678 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
679 if(func(box)) {
680 return;
681 }
682 } else {
683 func(box);
684 }
685 curr_b += box.size();
686 }
687 }
688 }
689 }
690
691};
692
693// ----------------------------------------------------------------------------
694// RandomPartitioner
695// ----------------------------------------------------------------------------
696
736template <typename C = DefaultClosureWrapper>
738
739 public:
740
744 static constexpr PartitionerType type() { return PartitionerType::DYNAMIC; }
745
749 RandomPartitioner() = default;
750
754 explicit RandomPartitioner(size_t sz) : PartitionerBase<C>(sz) {}
755
759 explicit RandomPartitioner(size_t sz, C&& closure) :
760 PartitionerBase<C>(sz, std::forward<C>(closure)) {
761 }
762
766 RandomPartitioner(float alpha, float beta) : _alpha{alpha}, _beta{beta} {}
767
771 RandomPartitioner(float alpha, float beta, C&& closure) :
772 _alpha {alpha}, _beta {beta},
773 PartitionerBase<C>(0, std::forward<C>(closure)) {
774 }
775
779 float alpha() const { return _alpha; }
780
784 float beta() const { return _beta; }
785
792 std::pair<size_t, size_t> chunk_size_range(size_t N, size_t W) const {
793
794 size_t b1 = static_cast<size_t>(_alpha * N * W);
795 size_t b2 = static_cast<size_t>(_beta * N * W);
796
797 if(b1 > b2) {
798 std::swap(b1, b2);
799 }
800
801 b1 = (std::max)(b1, size_t{1});
802 b2 = (std::max)(b2, b1 + 1);
803
804 return {b1, b2};
805 }
806
807 // --------------------------------------------------------------------------
808 // scheduling methods
809 // --------------------------------------------------------------------------
810
814 template <typename F>
815 void loop(
816 size_t N, size_t W, std::atomic<size_t>& next, F&& func
817 ) const {
818
819 auto [b1, b2] = chunk_size_range(N, W);
820
821 std::default_random_engine engine {std::random_device{}()};
822 std::uniform_int_distribution<size_t> dist(b1, b2);
823
824 size_t chunk_size = dist(engine);
825 size_t curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
826
827 while(curr_b < N) {
828 if constexpr (std::is_same_v<std::invoke_result_t<F, size_t, size_t>, bool>) {
829 if(func(curr_b, (std::min)(curr_b + chunk_size, N))) {
830 return;
831 }
832 } else {
833 func(curr_b, (std::min)(curr_b + chunk_size, N));
834 }
835 chunk_size = dist(engine);
836 curr_b = next.fetch_add(chunk_size, std::memory_order_relaxed);
837 }
838 }
839
843 template <IndexRangesLike R, typename F>
844 void loop(const R& range, size_t N, size_t W, std::atomic<size_t>& next, F&& func) const {
845
846 auto [b1, b2] = chunk_size_range(N, W);
847
848 std::default_random_engine engine{std::random_device{}()};
849 std::uniform_int_distribution<size_t> dist(b1, b2);
850
851 size_t curr_b = next.load(std::memory_order_relaxed);
852
853 while(curr_b < N) {
854 if constexpr (R::rank == 1) {
855 size_t curr_e = (std::min)(curr_b + dist(engine), N);
856 if(next.compare_exchange_weak(curr_b, curr_e,
857 std::memory_order_relaxed,
858 std::memory_order_relaxed)) {
859 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
860 if(func(range.unravel(curr_b, curr_e))) {
861 return;
862 }
863 } else {
864 func(range.unravel(curr_b, curr_e));
865 }
866 curr_b = curr_e;
867 }
868 } else {
869 auto box = range.upper_slice(curr_b, dist(engine));
870 if(next.compare_exchange_weak(curr_b, curr_b + box.size(),
871 std::memory_order_relaxed,
872 std::memory_order_relaxed)) {
873 if constexpr (std::is_same_v<std::invoke_result_t<F, R>, bool>) {
874 if(func(box)) {
875 return;
876 }
877 } else {
878 func(box);
879 }
880 curr_b += box.size();
881 }
882 }
883 }
884 }
885
886 private:
887
888 float _alpha {0.01f};
889 float _beta {0.50f};
890};
891
899
905template <typename P>
906concept PartitionerLike = std::derived_from<P, PartitionerBase<typename P::closure_wrapper_type>>;
907
915template <typename P>
916inline constexpr bool is_partitioner_v = PartitionerLike<P>;
917
918} // end of namespace tf -----------------------------------------------------
class to create a default closure wrapper
Definition partitioner.hpp:51
DynamicPartitioner()=default
default constructor
DynamicPartitioner(size_t sz, C &&closure)
construct a dynamic partitioner with the given chunk size and the closure
Definition partitioner.hpp:619
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:604
DynamicPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:614
class to create a guided partitioner for scheduling parallel algorithms
Definition partitioner.hpp:417
GuidedPartitioner(size_t sz, C &&closure)
construct a guided partitioner with the given chunk size and the closure
Definition partitioner.hpp:440
GuidedPartitioner(size_t sz)
construct a guided partitioner with the given chunk size
Definition partitioner.hpp:435
GuidedPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:424
PartitionerBase(size_t chunk_size)
construct a partitioner with the given chunk size
Definition partitioner.hpp:147
static constexpr bool is_default_wrapper_v
indicating if the given closure wrapper is a default wrapper (i.e., empty)
Definition partitioner.hpp:132
C closure_wrapper_type
the closure type
Definition partitioner.hpp:137
void chunk_size(size_t cz)
update the chunk size of this partitioner
Definition partitioner.hpp:165
const C & closure_wrapper() const
acquire an immutable access to the closure wrapper object
Definition partitioner.hpp:170
void closure_wrapper(F &&fn)
modify the closure wrapper object
Definition partitioner.hpp:181
PartitionerBase(size_t chunk_size, C &&closure_wrapper)
construct a partitioner with the given chunk size and closure wrapper
Definition partitioner.hpp:152
C & closure_wrapper()
acquire a mutable access to the closure wrapper object
Definition partitioner.hpp:175
PartitionerBase()=default
default constructor
size_t chunk_size() const
query the chunk size of this partitioner
Definition partitioner.hpp:160
RandomPartitioner(size_t sz, C &&closure)
construct a random partitioner with the given chunk size and the closure
Definition partitioner.hpp:759
RandomPartitioner(float alpha, float beta, C &&closure)
constructs a random partitioner with the given parameters and the closure
Definition partitioner.hpp:771
std::pair< size_t, size_t > chunk_size_range(size_t N, size_t W) const
queries the range of chunk size
Definition partitioner.hpp:792
RandomPartitioner(float alpha, float beta)
constructs a random partitioner with the given parameters
Definition partitioner.hpp:766
RandomPartitioner()=default
default constructor
static constexpr PartitionerType type()
queries the partition type (dynamic)
Definition partitioner.hpp:744
float alpha() const
queries the alpha value
Definition partitioner.hpp:779
RandomPartitioner(size_t sz)
construct a dynamic partitioner with the given chunk size
Definition partitioner.hpp:754
float beta() const
queries the beta value
Definition partitioner.hpp:784
StaticPartitioner()=default
default constructor
size_t adjusted_chunk_size(size_t N, size_t W, size_t w) const
queries the adjusted chunk size
Definition partitioner.hpp:295
StaticPartitioner(size_t sz)
construct a static partitioner with the given chunk size
Definition partitioner.hpp:279
StaticPartitioner(size_t sz, C &&closure)
construct a static partitioner with the given chunk size and the closure
Definition partitioner.hpp:284
static constexpr PartitionerType type()
queries the partition type (static)
Definition partitioner.hpp:269
determines if a type is a partitioner
Definition partitioner.hpp:906
taskflow namespace
Definition small_vector.hpp:20
@ STATIC
static task type
Definition task.hpp:25
PartitionerType
enumeration of all partitioner types
Definition partitioner.hpp:19
@ DYNAMIC
dynamic partitioner type
Definition partitioner.hpp:23
@ STATIC
static partitioner type
Definition partitioner.hpp:21
constexpr bool is_partitioner_v
determines if a type is a partitioner (variable template)
Definition partitioner.hpp:916
GuidedPartitioner<> DefaultPartitioner
default partitioner set to tf::GuidedPartitioner
Definition partitioner.hpp:898