If I have two 32-bit values, X and Y, how can I efficiently interleave their bits into one 64-bit value Z, in the order xyxyxyxy... (Z is a position on a Z-order curve.)
I could iterate over each of the bits in X and Y, setting bits in Z as I go. This seems inefficient.
Is there a shortcut to interleaving the bits from two values into one large value, which takes less than a hundred CPU instructions?
This C++-answer also applies to C: https://stackoverflow.com/a/39490836/11993121
The answer outlines the principle, but does not write out the complete solution. A working implementation is as follows:
#include <stdint.h>
uint64_t interleave(uint32_t x0, uint32_t y0)
{
static const uint64_t B[] = {0x0000FFFF0000FFFF, 0x00FF00FF00FF00FF, 0x0F0F0F0F0F0F0F0F, 0x3333333333333333, 0x5555555555555555};
static const unsigned S[] = {16, 8, 4, 2, 1};
uint64_t x = x0;
uint64_t y = y0;
for(unsigned i = 0; i < sizeof(B)/sizeof(B[0]); i++)
{
x = (x | (x << S[i])) & B[i];
y = (y | (y << S[i])) & B[i];
}
return x | (y << 1);
}
Example of test:
#include <stdio.h>
void printBinary64(uint64_t x)
{
uint64_t bit = ((uint64_t)1 << 63);
for(unsigned i = 0; i < 64; i++)
{
printf("%c", (x&bit) ? '1' : '0');
bit = bit >> 1;
}
}
void printBinary32(uint32_t x)
{
uint32_t bit = ((uint32_t)1 << 31);
for(unsigned i = 0; i < 32; i++)
{
printf("%c ", (x&bit) ? '1' : '0');
bit = bit >> 1;
}
}
int main(void)
{
uint32_t x = 0x01234567;
uint32_t y = 0xFEDCBA98;
printf(" ");
printBinary32(x);
printf("\n");
printBinary32(y);
printf("\n");
printBinary64(interleave(x,y));
printf("\n");
}