Skip to content

Commit 3ee88db

Browse files
committed
fix constans
1 parent 56a377f commit 3ee88db

1 file changed

Lines changed: 48 additions & 47 deletions

File tree

blksort.h

Lines changed: 48 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
#ifndef INC_BLKSORT_H_
22
#define INC_BLKSORT_H_
3+
/**
4+
@file blksort.h
5+
6+
USAGE:
7+
Put '#define BLKSORT_IMPLEMENTATION' before including this file to create the implementation.
8+
*/
9+
310
#include <cstdint>
411
#if defined(_MSC_VER)
512
# define BLKSORT_RESTRICT __restrict
@@ -35,9 +42,9 @@
3542

3643
#ifndef BLKSORT_ALIGN
3744
# if defined(_MSC_VER)
38-
#define BLKSORT_ALIGN(x) __declspec(align(x))
45+
# define BLKSORT_ALIGN(x) __declspec(align(x))
3946
# elif defined(__GNUC__) || defined(__clang__)
40-
#define BLKSORT_ALIGN(x) __attribute__((aligned(x)))
47+
# define BLKSORT_ALIGN(x) __attribute__((aligned(x)))
4148
# else
4249
# error
4350
# endif
@@ -109,13 +116,13 @@ class BlkSort
109116

110117
#include <algorithm>
111118
#ifdef __AVX__
112-
#define BLKSORT_AVX (1)
113-
#include <immintrin.h>
119+
# define BLKSORT_AVX (1)
120+
# include <immintrin.h>
114121
#endif
115122

116123
#ifdef __ARM_NEON
117-
#define BLKSORT_NEON (1)
118-
#include <arm_neon.h>
124+
# define BLKSORT_NEON (1)
125+
# include <arm_neon.h>
119126
#endif
120127

121128
#if BLOCKSORT_PERF
@@ -359,7 +366,8 @@ void sort(uint32_t size, Item* data, uint32_t depth)
359366
void counting_sort(uint32_t size, uint16_t* dst, const uint8_t* key, const uint16_t* value)
360367
{
361368
assert(0 == (size & 15));
362-
BLKSORT_ALIGN(16) uint16_t count[259];
369+
BLKSORT_ALIGN(16)
370+
uint16_t count[259];
363371
::memset(count, 0, 256 * sizeof(uint16_t));
364372
for(uint32_t i = 0; i < size; i += 4) {
365373
count[key[i + 0]] += 1;
@@ -542,14 +550,13 @@ void BlkSort::decode_internal(uint8_t* BLKSORT_RESTRICT dst, uint8_t* BLKSORT_RE
542550
start = std::chrono::high_resolution_clock::now();
543551
#endif
544552
uint16_t* id = (uint16_t*)buffer_;
553+
// clang-format off
554+
BLKSORT_ALIGN(Align) static const uint16_t ID0[8] = {0,1,2,3,4,5,6,7};
555+
BLKSORT_ALIGN(Align) static const uint16_t ID1[8] = {8,9,10,11,12,13,14,15};
556+
BLKSORT_ALIGN(Align) static const uint16_t ID2[8] = {16,17,18,19,20,21,22,23};
557+
BLKSORT_ALIGN(Align) static const uint16_t ID3[8] = {24,25,26,27,28,29,30,31};
558+
// clang-format on
545559
#if defined(BLKSORT_AVX)
546-
// clang-format off
547-
BLKSORT_ALIGN(16) static const uint16_t ID0[8] = {0,1,2,3,4,5,6,7};
548-
BLKSORT_ALIGN(16) static const uint16_t ID1[8] = {8,9,10,11,12,13,14,15};
549-
BLKSORT_ALIGN(16) static const uint16_t ID2[8] = {16,17,18,19,20,21,22,23};
550-
BLKSORT_ALIGN(16) static const uint16_t ID3[8] = {24,25,26,27,28,29,30,31};
551-
// clang-format on
552-
553560
# if 0
554561
if (size_ <= 32) {
555562
__m128i c0 = _mm_load_si128((const __m128i*)ID0);
@@ -563,30 +570,24 @@ void BlkSort::decode_internal(uint8_t* BLKSORT_RESTRICT dst, uint8_t* BLKSORT_RE
563570
}
564571

565572
} else
566-
#else
567-
__m128i c0 = _mm_load_si128((const __m128i*)ID0);
568-
__m128i c1 = _mm_load_si128((const __m128i*)ID1);
569-
__m128i c2 = _mm_load_si128((const __m128i*)ID2);
570-
__m128i c3 = _mm_load_si128((const __m128i*)ID3);
571-
__m128i add = _mm_set1_epi16(32);
572-
for(uint32_t i = 0; i < size_; i += 32) {
573-
_mm_store_si128((__m128i*)&id[i], c0);
574-
c0 = _mm_adds_epi16(c0, add);
575-
_mm_store_si128((__m128i*)&id[i + 8], c1);
576-
c1 = _mm_adds_epi16(c1, add);
577-
_mm_store_si128((__m128i*)&id[i + 16], c2);
578-
c2 = _mm_adds_epi16(c2, add);
579-
_mm_store_si128((__m128i*)&id[i + 24], c3);
580-
c3 = _mm_adds_epi16(c3, add);
581-
}
573+
# else
574+
__m128i c0 = _mm_load_si128((const __m128i*)ID0);
575+
__m128i c1 = _mm_load_si128((const __m128i*)ID1);
576+
__m128i c2 = _mm_load_si128((const __m128i*)ID2);
577+
__m128i c3 = _mm_load_si128((const __m128i*)ID3);
578+
__m128i add = _mm_set1_epi16(32);
579+
for(uint32_t i = 0; i < size_; i += 32) {
580+
_mm_store_si128((__m128i*)&id[i], c0);
581+
c0 = _mm_adds_epi16(c0, add);
582+
_mm_store_si128((__m128i*)&id[i + 8], c1);
583+
c1 = _mm_adds_epi16(c1, add);
584+
_mm_store_si128((__m128i*)&id[i + 16], c2);
585+
c2 = _mm_adds_epi16(c2, add);
586+
_mm_store_si128((__m128i*)&id[i + 24], c3);
587+
c3 = _mm_adds_epi16(c3, add);
588+
}
582589
# endif
583590
#elif defined(BLKSORT_NEON)
584-
// clang-format off
585-
BLKSORT_ALIGN(16) static const uint16_t ID0[8] = {0,1,2,3,4,5,6,7};
586-
BLKSORT_ALIGN(16) static const uint16_t ID1[8] = {8,9,10,11,12,13,14,15};
587-
BLKSORT_ALIGN(16) static const uint16_t ID2[8] = {16,17,18,19,20,21,22,23};
588-
BLKSORT_ALIGN(16) static const uint16_t ID3[8] = {24,25,26,27,28,29,30,31};
589-
// clang-format on
590591
uint16x8_t c0 = vld1q_u16(ID0);
591592
uint16x8_t c1 = vld1q_u16(ID1);
592593
uint16x8_t c2 = vld1q_u16(ID2);
@@ -604,11 +605,11 @@ void BlkSort::decode_internal(uint8_t* BLKSORT_RESTRICT dst, uint8_t* BLKSORT_RE
604605
}
605606

606607
#else
607-
for(uint32_t i = 0; i < size_; i+=4) {
608-
id[i+0] = i+0;
609-
id[i+1] = i+1;
610-
id[i+2] = i+2;
611-
id[i+3] = i+3;
608+
for(uint32_t i = 0; i < size_; i += 4) {
609+
id[i + 0] = i + 0;
610+
id[i + 1] = i + 1;
611+
id[i + 2] = i + 2;
612+
id[i + 3] = i + 3;
612613
}
613614
#endif
614615

@@ -660,10 +661,10 @@ void BlkSort::decode_internal(uint8_t* BLKSORT_RESTRICT dst, uint8_t* BLKSORT_RE
660661

661662
void BlkSort::mtf_init(uint8_t* BLKSORT_RESTRICT id)
662663
{
663-
static BLKSORT_ALIGN(16) const uint8_t ID0[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
664-
static BLKSORT_ALIGN(16) const uint8_t ID1[16] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
665-
static BLKSORT_ALIGN(16) const uint8_t ID2[16] = {32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47};
666-
static BLKSORT_ALIGN(16) const uint8_t ID3[16] = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
664+
static BLKSORT_ALIGN(Align) const uint8_t ID0[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
665+
static BLKSORT_ALIGN(Align) const uint8_t ID1[16] = {16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31};
666+
static BLKSORT_ALIGN(Align) const uint8_t ID2[16] = {32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47};
667+
static BLKSORT_ALIGN(Align) const uint8_t ID3[16] = {48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63};
667668
#ifdef BLKSORT_AVX
668669
__m128i c0 = _mm_load_si128((const __m128i*)ID0);
669670
__m128i c1 = _mm_load_si128((const __m128i*)ID1);
@@ -727,7 +728,7 @@ void BlkSort::mtf_encode(uint32_t size, uint8_t* BLKSORT_RESTRICT data)
727728
std::chrono::high_resolution_clock::time_point start, end;
728729
start = std::chrono::high_resolution_clock::now();
729730
#endif
730-
BLKSORT_ALIGN(16) uint8_t table[256];
731+
BLKSORT_ALIGN(Align) uint8_t table[256];
731732
mtf_init(table);
732733
#if BLOCKSORT_PERF
733734
end = std::chrono::high_resolution_clock::now();
@@ -761,7 +762,7 @@ void BlkSort::mtf_decode(uint32_t size, uint8_t* BLKSORT_RESTRICT data)
761762
std::chrono::high_resolution_clock::time_point start, end;
762763
start = std::chrono::high_resolution_clock::now();
763764
#endif
764-
BLKSORT_ALIGN(16) uint8_t table[256];
765+
BLKSORT_ALIGN(Align) uint8_t table[256];
765766
mtf_init(table);
766767
#if BLOCKSORT_PERF
767768
end = std::chrono::high_resolution_clock::now();

0 commit comments

Comments
 (0)