forked from fast-pack/SIMDCompressionAndIntersection
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtestintegration.cpp
More file actions
171 lines (145 loc) · 5.19 KB
/
Copy pathtestintegration.cpp
File metadata and controls
171 lines (145 loc) · 5.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/**
* This code is released under the
* Apache License Version 2.0 http://www.apache.org/licenses/.
*
*/
#include "common.h"
#include "util.h"
#include "timer.h"
#include "bitpackinghelpers.h"
#include "simdbitpackinghelpers.h"
#include "delta.h"
#include "synthetic.h"
using namespace std;
using namespace SIMDCompressionLib;
vector<uint32_t> maskedcopy(const vector<uint32_t> &in, const uint32_t bit) {
vector<uint32_t> out(in);
if (bit == 32)
return out;
for (auto i = out.begin(); i != out.end(); ++i) {
*i = *i % (1U << bit);
}
return out;
}
template <class container32bit>
bool equalOnFirstBits(const container32bit &data,
const container32bit &recovered, uint32_t bit) {
if (bit == 32) {
return data == recovered;
}
for (uint32_t k = 0; k < data.size(); ++k) {
if (data[k] % (1U << bit) != recovered[k] % (1U << bit)) {
cout << " They differ at k = " << k << " data[k]= " << data[k]
<< " recovered[k]=" << recovered[k] << endl;
return false;
}
}
return true;
}
uint32_t mask(uint32_t bit) {
if (bit == 32)
return 0xFFFFFFFFU;
return (1U << bit) - 1;
}
template <class Helper>
void simplebenchmark(uint32_t N = 1U << 16, uint32_t T = 1U << 9) {
T = T + 1; // we have a warming up pass
uint32_t bogus = 0;
vector<uint32_t> data(N);
vector<uint32_t> compressed(N);
vector<uint32_t> icompressed(N);
vector<uint32_t> recovered(N);
WallClockTimer z;
double unpacktime;
double iunpacktime;
cout << "#million of integers per second: higher is better" << endl;
cout << "#bit, unpack,iunpack" << endl;
for (uint32_t bitindex = 0; bitindex < 32; ++bitindex) {
uint32_t bit = bitindex + 1;
vector<uint32_t> initdata(N);
for (size_t i = 0; 4 * i < data.size(); i += 4) {
initdata[i] = random(bit) + (i >= 4 ? initdata[i - 4] : 0);
for (size_t j = 1; j < 4; ++j) {
initdata[i + j] = initdata[i];
}
}
const vector<uint32_t> refdata = initdata;
vector<uint32_t>().swap(initdata);
icompressed.clear();
// 4 * N should be enough for all schemes
icompressed.resize(4 * N, 0);
compressed.clear();
// 4 * N should be enough for all schemes
compressed.resize(4 * N, 0);
recovered.clear();
recovered.resize(N, 0);
if (needPaddingTo128Bits(recovered.data())) {
throw logic_error("Array is not aligned on 128 bit boundary!");
}
if (needPaddingTo128Bits(icompressed.data())) {
throw logic_error("Array is not aligned on 128 bit boundary!");
}
if (needPaddingTo128Bits(compressed.data())) {
throw logic_error("Array is not aligned on 128 bit boundary!");
}
if (needPaddingTo128Bits(refdata.data())) {
throw logic_error("Array is not aligned on 128 bit boundary!");
}
for (uint32_t repeat = 0; repeat < 1; ++repeat) {
unpacktime = 0;
iunpacktime = 0;
for (uint32_t t = 0; t <= T; ++t) {
assert(data.size() == refdata.size());
fill(icompressed.begin(), icompressed.end(), 0);
fill(recovered.begin(), recovered.end(), 0);
memcpy(data.data(), refdata.data(),
data.size() * sizeof(uint32_t)); // memcpy can be slow
Helper::pack(data.data(), data.size(), icompressed.data(), bit);
z.reset();
Helper::unpack(icompressed.data(), refdata.size(), recovered.data(),
bit);
if (t > 0) // we don't count the first run
unpacktime += static_cast<double>(z.split());
if (!equalOnFirstBits(refdata, recovered, bit)) {
cout << " Bug 1a " << bit << endl;
return;
}
memcpy(data.data(), refdata.data(),
data.size() * sizeof(uint32_t)); // memcpy can be slow
Helper::pack(data.data(), data.size(), icompressed.data(), bit);
z.reset();
Helper::iunpack(icompressed.data(), refdata.size(), recovered.data(),
bit);
if (t > 0) // we don't count the first run
iunpacktime += static_cast<double>(z.split());
if (!equalOnFirstBits(refdata, recovered, bit)) {
cout << " Bug 2 " << bit << endl;
return;
}
}
cout << std::setprecision(4) << bit << "\t\t";
cout << "\t\t" << N * (T - 1) / (unpacktime) << "\t\t";
cout << "\t\t" << N * (T - 1) / (iunpacktime);
cout << endl;
}
}
cout << "# ignore this " << bogus << endl;
}
int main() {
cout << "# SIMD bit-packing (regular) cache-to-cache 2^12" << endl;
simplebenchmark<SIMDBitPackingHelpers<RegularDeltaSIMD>>(1U << 12, 1U << 14);
cout << endl;
cout << "# SIMD bit-packing (coarse delta 2) cache-to-cache 2^12" << endl;
simplebenchmark<SIMDBitPackingHelpers<CoarseDelta2SIMD>>(1U << 12, 1U << 14);
cout << endl;
cout << "# SIMD bit-packing (coarse max 4) cache-to-cache 2^12" << endl;
simplebenchmark<SIMDBitPackingHelpers<Max4DeltaSIMD>>(1U << 12, 1U << 14);
cout << endl;
cout << "# SIMD bit-packing (coarse delta 4) cache-to-cache 2^12" << endl;
simplebenchmark<SIMDBitPackingHelpers<CoarseDelta4SIMD>>(1U << 12, 1U << 14);
cout << endl;
cout << "# Scalar cache-to-cache 2^12" << endl;
simplebenchmark<BitPackingHelpers>(1U << 12, 1U << 14);
cout << endl;
return 0;
}