-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathbitmanip.hpp
480 lines (425 loc) · 14.4 KB
/
bitmanip.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
#pragma once
#ifndef BITMANIP_HPP
#define BITMANIP_HPP
#include <cstddef>
#include <cstdint>
#include <type_traits>
// ===== USER CONFIGURATION ============================================================================================
// Uncomment the following #define if you prefer std::includes over the custom solutions provided in this section.
// There are few functions that use <array> and <algorithm> (of which only std::min is used), so you can save on
// compilation time by using the definitions in this section.
// #define BITMANIP_PREFER_STD_INCLUDES
// Uncomment the following #define if you want a bitmanip:: namespace.
// #define BITMANIP_PREFER_NAMESPACE
// ====== CONDITIONALLY CONFIGURED CODE ================================================================================
#ifdef PREFER_STD_INCLUDES
#include <algorithm>
#include <array>
#include <limits>
#ifdef BITMANIP_PREFER_NAMESPACE
namespace bitmanip {
#endif
template <typename T, size_t N>
using arr = std::array<T, N>;
using std::min;
/**
* @brief templated variable which contains the number of bits for any given integer type.
* Example: bits_v<uint32_t> = 32
*/
template <typename Int, std::enable_if_t<std::is_integral_v<Int>, int> = 0>
constexpr size_t bits_v = std::numeric_limits<Int>::digits;
#else
#ifdef BITMANIP_PREFER_NAMESPACE
namespace bitmanip {
#endif
/**
* @brief Simple implementation of min to avoid including the entirety of <algorithm>
* @param x the first value
* @param y the second value
* @param the minimum of the values
*/
template <typename T>
constexpr T min(T x, T y)
{
return x < y ? x : y;
}
template <typename T, size_t N>
struct arr {
T data[N];
};
/**
* @brief templated variable which contains the number of bits for any given integer type.
* Example: bits_v<uint32_t> = 32
*/
template <typename Int, std::enable_if_t<std::is_integral_v<Int>, int> = 0>
constexpr size_t bits_v = sizeof(Int) * 8;
#endif
// ==== UTILITY ========================================================================================================
/**
* @brief Integer division but with ceiling (rounding up to the next integer) instead of rounding down as usual.
* @param the dividend
* @param the divisor
* @param the quotient, rounded up to the nearest integer
*/
template <typename Int, std::enable_if_t<(std::is_integral_v<Int>), int> = 0>
constexpr Int div_ceil(Int x, Int y)
{
return 1 + ((x - 1) / y);
}
// ===== BIT MANIPULATION ==============================================================================================
/**
* @brief Returns whether an unsigned integer is a power of 2 or zero.
* Note that this test is faster than having to test if val is a power of 2.
* @param val the parameter to test
* @return true if val is a power of 2 or if val is zero
*/
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0>
constexpr bool is_pow2_or_zero(Uint val)
{
return (val & (val - 1)) == 0;
}
/**
* @brief Returns whether an unsigned integer is a power of 2.
* @param val the parameter to test
* @return true if val is a power of 2
* @see is_pow2_or_zero
*/
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0>
constexpr bool is_pow2(Uint val)
{
return val != 0 && is_pow2_or_zero(val);
}
/**
* @brief Naive implementation of log2 using repeated single-bit rightshifting.
*/
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0>
constexpr Uint log2_floor_naive(Uint val) noexcept
{
Uint result = 0;
while (val >>= 1) {
++result;
}
return result;
}
/**
* @brief Fast implementation of log2.
* See https://graphics.stanford.edu/~seander/bithacks.html#IntegerLog.
*
* Unrolled 32-bit version:
* unsigned shift = (v > 0xFFFF) << 4;
v >>= shift;
r |= shift;
shift = (v > 0xFF ) << 3;
v >>= shift;
r |= shift;
shift = (v > 0xF ) << 2;
v >>= shift;
r |= shift;
shift = (v > 0x3 ) << 1;
v >>= shift;
r |= shift;
shift = (v > 1) << 0;
r >>= shift;
r |= shift;
*/
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0>
constexpr Uint log2_floor_fast(Uint v) noexcept
{
constexpr size_t iterations = log2_floor_naive(bits_v<Uint>);
unsigned result = 0;
for (size_t i = iterations; i != 0; --i) {
unsigned compBits = 1 << (i - 1);
Uint compShift = Uint{1} << compBits;
unsigned shift = unsigned{v >= compShift} << (i - 1);
v >>= shift;
result |= shift;
}
return result;
}
/**
* @brief log2_floor implementation using De Bruijn multiplication.
* See https://graphics.stanford.edu/~seander/bithacks.html#IntegerLogDeBruijn.
* @param val the value
*/
constexpr uint32_t log2_floor_debruijn(uint32_t val) noexcept
{
constexpr uint32_t MultiplyDeBruijnBitPosition[32] = {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30,
8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31};
// first round up to one less than a power of 2
// this step is not necessary if val is a power of 2
// Note: the link in @see says that this is rounding down, but it is actually rounding up
val |= val >> 1;
val |= val >> 2;
val |= val >> 4;
val |= val >> 8;
val |= val >> 16;
return MultiplyDeBruijnBitPosition[(val * uint32_t{0x07C4ACDD}) >> 27];
}
/**
* @brief Computes the floored binary logarithm of a given integer.
* Example: log2_floor(100) = 6
*
* This templated function will choose the best available method depending on the type of the integer.
* It may fail if a negative signed integer is given.
*
* @param v the value
* @return the floored binary logarithm
*/
template <typename Int, std::enable_if_t<std::is_integral_v<Int>, int> = 0>
constexpr Int log2_floor(Int v) noexcept
{
if constexpr (std::is_signed_v<Int>) {
using Uint = std::make_unsigned_t<Int>;
DEBUG_ASSERT_GE(v, 0);
return log2_floor_fast<Uint>(static_cast<Uint>(v));
}
else {
return log2_floor_fast<Int>(v);
}
}
template <>
constexpr uint32_t log2_floor<uint32_t>(uint32_t v) noexcept
{
return log2_floor_debruijn(v);
}
/**
* @brief Computes the ceiled binary logarithm of a given integer.
* Example: log2_ceil(100) = 7
*
* This templated function will choose the best available method depending on the type of the integer.
* It may fail if a negative signed integer is given.
*
* @param v the value
* @return the floored binary logarithm
*/
template <typename Int, std::enable_if_t<std::is_integral_v<Int>, int> = 0>
constexpr Int log2_ceil(Int val) noexcept
{
const Int result = log2_floor(val);
const bool inputIsntPow2 = val != (static_cast<Int>(1) << result);
return result + inputIsntPow2;
}
/**
* @brief Rounds up an unsigned integer to the next power of 2.
* Powers of two are not affected.
* Examples: 100 -> 128, 1 -> 1, 3 -> 4, 3000 -> 4096
* @param v the value to round up
*/
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0>
constexpr Uint ceil_pow2(Uint v)
{
DEBUG_ASSERT_NE(v, 0);
constexpr size_t iterations = log2_floor_naive(bits_v<Uint>);
// decrement is necessary so that powers of 2 don't get increased
v--;
for (size_t i = 0; i < iterations; ++i) {
// after all iterations, all bits right to the msb will be filled with 1
v |= v >> (1 << i);
}
// this step turns the number back to a power of two
return v + 1;
}
/**
* @brief Rounds down an unsigned integer to the next power of 2.
* Powers of 2 are not affected.
* Examples: 100 -> 64, 1 -> 1, 3 -> 2, 3000 -> 2048
* @param v the value to round down
*/
template <typename Uint, std::enable_if_t<std::is_unsigned_v<Uint>, int> = 0>
constexpr Uint floor_pow2(Uint v)
{
DEBUG_ASSERT_NE(v, 0);
constexpr size_t iterations = log2_floor_naive(bits_v<Uint>);
// leftshift is necessary to floor instead of ceil
v >>= 1;
for (size_t i = 0; i < iterations; ++i) {
// after all iterations, all bits right to the msb will be filled with 1
v |= v >> (1 << i);
}
// this step turns the number back to a power of two
return v + 1;
}
/**
* @brief Interleaves an input number with <bits> zero-bits per input bit. Example: 0b11 -> 0b0101
* @param input the input number
* @param bits the amount of zero bits per input bit to interleave
* @return the input number interleaved with input-bits
*/
constexpr uint64_t ileave_zeros_naive(uint32_t input, size_t bits)
{
const auto lim = min<size_t>(32, div_ceil<size_t>(64, bits + 1));
uint64_t result = 0;
for (size_t i = 0, b_out = 0; i < lim; ++i) {
result |= static_cast<uint64_t>(input & 1) << b_out;
input >>= 1;
b_out += bits + 1;
}
return result;
}
/**
* @brief Removes each interleaved <bits> bits. Example: 0b010101 --rem 1--> 0b111
* @param input the input number
* @param bits input bits per output bit
* @return the the output with removed bits
*/
constexpr uint64_t rem_ileaved_bits_naive(uint64_t input, size_t bits) noexcept
{
// increment once to avoid modulo divisions by 0
// this way our function is noexcept and safe for all inputs
++bits;
uint64_t result = 0;
for (size_t i = 0, b_out = 0; i < 64; ++i) {
if (i % bits == 0) {
result |= (input & 1) << b_out++;
}
input >>= 1;
}
return result;
}
/**
* @brief Duplicates each input bit <bits> times. Example: 0b101 -> 0b110011
* @param input the input number
* @param out_bits_per_in_bits the output bits per input bits
* @return the output number or 0 if the bits parameter was zero
*/
constexpr uint64_t dupl_bits_naive(uint64_t input, size_t out_bits_per_in_bits)
{
if (out_bits_per_in_bits == 0) {
return 0;
}
const auto lim = div_ceil<size_t>(64, out_bits_per_in_bits);
uint64_t result = 0;
for (size_t i = 0, b_out = 0; i < lim; ++i) {
for (size_t j = 0; j < out_bits_per_in_bits; ++j, ++b_out) {
result |= static_cast<uint64_t>((input >> i) & 1) << b_out;
}
}
return result;
}
/**
* @brief Interleaves <BITS> zero-bits inbetween each input bit.
* @param input the input number
* @tparam BITS the number of bits to be interleaved, must be > 0
* @return the input interleaved with <BITS> bits
*/
template <size_t BITS>
constexpr uint64_t ileave_zeros(uint32_t input)
{
if constexpr (BITS == 0) {
return input;
}
else {
constexpr uint64_t MASKS[] = {
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 1),
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 2),
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 4),
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 8),
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 16),
};
// log2_floor(0) == 0 so this is always safe, even for 1 bit
constexpr int start = 4 - (log2_floor(BITS >> 1));
uint64_t n = input;
for (int i = start; i != -1; --i) {
size_t shift = BITS * (1 << i);
n |= n << shift;
n &= MASKS[i];
}
return n;
}
}
/**
* @brief Removes each interleaved <BITS> bits. Example: 0b010101 --rem 1--> 0b111
* If BITS is zero, no bits are removed and the input is returned.
* @param input the input number
* @param bits input bits per output bit
* @return the the output with removed bits
*/
template <size_t BITS>
constexpr uint64_t rem_ileaved_bits(uint64_t input) noexcept
{
if constexpr (BITS == 0) {
return input;
}
else {
constexpr uint64_t MASKS[] = {
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 1),
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 2),
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 4),
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 8),
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 16),
dupl_bits_naive(ileave_zeros_naive(~uint32_t(0), BITS), 32),
};
// log2_floor(0) == 0 so this is always safe, even for 1 bit
constexpr size_t iterations = 5 - (log2_floor(BITS >> 1));
input &= MASKS[0];
for (size_t i = 0; i < iterations; ++i) {
size_t rshift = (1 << i) * BITS;
input |= input >> rshift;
input &= MASKS[i + 1];
}
return input;
}
}
/**
* @brief Interleaves 2 integers, where hi comprises the upper bits of each bit pair and lo the lower bits.
* Examle: ileave2(0b111, 0b000) = 0b101010
*
* This is also referred to as a Morton Code in scientific literature.
*
* @param hi the high bits
* @param lo the low bits
* @return the interleaved bits
*/
constexpr uint64_t ileave2(uint32_t hi, uint32_t lo)
{ // TODO implement like ileav3
constexpr uint64_t MASKS[] = {0x5555'5555'5555'5555,
0x3333'3333'3333'3333,
0x0F0F'0F0F'0F0F'0F0F,
0x00FF'00FF'00FF'00FF,
0x0000'FFFF'0000'FFFF};
uint64_t result = 0;
uint32_t *nums[] = {&hi, &lo};
for (size_t i = 0; i < 2; ++i) {
uint64_t n = *nums[i];
for (int i = 4; i != -1; --i) {
n |= n << (1 << i);
n &= MASKS[i];
}
result |= n << (1 - i);
}
return result;
}
/**
* @brief Interleaves 3 integers, where x comprises the uppermost bits of each bit triple and z the lowermost bits.
*
* This is also referred to as a Morton Code in scientific literature.
*
* @param x the highest bits
* @param y the middle bits
* @param z the lowest bits
* @return the interleaved bits
*/
constexpr uint64_t ileave3(uint32_t x, uint32_t y, uint32_t z)
{
return (ileave_zeros<2>(x) << 2) | (ileave_zeros<2>(y) << 1) | ileave_zeros<2>(z);
}
/**
* @brief Deinterleaves 3 integers which are interleaved in a single number.
* Visualization: abcdefghi -> (adg, beh, cfi)
*
* This is also referred to as a Morton Code in scientific literature.
*
* @param n the number
* @return the interleaved bits
*/
constexpr arr<uint32_t, 3> dileave3(uint64_t n)
{
uint32_t x = static_cast<uint32_t>(rem_ileaved_bits<2>(n >> 2));
uint32_t y = static_cast<uint32_t>(rem_ileaved_bits<2>(n >> 1));
uint32_t z = static_cast<uint32_t>(rem_ileaved_bits<2>(n >> 0));
return {x, y, z};
}
#ifdef BITMANIP_PREFER_NAMESPACE
}
#endif
#endif // BITMANIP_HPP