As a follow up to Store 2 4-bit numbers in 1 8 bit number, I am wondering if there is a generalization to it where you can store n x-bit numbers into m y-bit numbers. For example, maybe you can store 5 8-bit numbers into 3 15-bit numbers. Or maybe 2 8-bit numbers into 1 16-bit number, or 3 16-bit numbers into 2 32-bit numbers. Wondering what the implementation would be for encoding and decoding for a procedure that did this, or if it's not possible.
Something like:
function encode(i, s1, n, s2) {
// i = array of input bytes
// s1 = size of input bytes
// n = number of output bytes
// s2 = size of output bytes
}
function decode(i, s1, n, s2) {
}
Based on the answer below, I tried translating it to JavaScript but don't understand what anything really means and don't think it works.
function encode(input, inputSize, outputSize, callback) {
var buffer = 0
var bbits = 0
var mask = (1 << outputSize) - 1
while (bbits < outputSize) {
buffer |= (input << bbits)
bbits += inputSize
}
while (bbits >= outputSize) {
callback(buffer & mask)
buffer >>= outputSize
bbits -= outputSize
}
}
You can't store 5 8-bit numbers into 3 15-bit numbers because 45 bits of information obviously don't fit in 40 bits of memory. You can only do that if the total number of variations is smaller than or equal to 2k with k is the number of bits used for encoding
If the width of every value is the same then here's my attempt, which stores the bits linearly in big endian. The encode function translates the bits in a byte array to another array that stores the full value in bitLength
bits and the decode function does the reverse thing
function encode(input, bitLength) {
// size of each array element must be greater than bitLength
var output = new Uint16Array(Math.ceil(input.length * 8 / bitLength));
var remainingBits = bitLength; // the remaining bits left for the current value
// example when bitLength = 11
// start of current value
// │ next value
// │2345678901│
// ...┆ ↓ ┆ ↓ ┆ ┆ ┆ ┆... ← input bytes
// ...₀₁₂₃₄₅₆₇⁰¹²³⁴⁵⁶⁷₀₁₂₃₄₅₆₇⁰¹²³⁴⁵⁶⁷₀₁₂₃₄₅₆₇ ... ← bit position
for (var inIdx = 0, outIdx = 0; inIdx < input.length; inIdx++) {
if (remainingBits > 8) {
output[outIdx] = (output[outIdx] << 8) | input[inIdx];
remainingBits -= 8; // 8 less bits to read
} else if (remainingBits == 8) { // finish current value
output[outIdx] = (output[outIdx] << 8) | input[inIdx];
remainingBits = bitLength; // next byte is the start of the next output value
outIdx++;
} else {
var nextRemainingBits = 8 - remainingBits;
output[outIdx] = (output[outIdx] << remainingBits)
| (input[inIdx] >>> nextRemainingBits);
// the leftover bits (nextRemainingBits) in the input byte
// go into the next output
output[++outIdx] = input[inIdx] & ((1 << nextRemainingBits) - 1);
// adjust the number of remaining bits, after we've read
// `8 - remainingBits` bits for the current output
remainingBits = bitLength - nextRemainingBits;
}
}
return output;
}
function decode(input, bitLength) {
const numBits = input.BYTES_PER_ELEMENT*8;
var output = new Uint8Array(Math.ceil(input.length * bitLength / 8));
var remainingInputBits = bitLength; // the remaining bits left for the current value
// shift value to the most significant position
for (var i = 0; i < input.length; i++)
input[i] <<= numBits - bitLength;
for (var inIdx = 0, outIdx = 0; outIdx < output.length; outIdx++) {
if (remainingInputBits > 8) {
output[outIdx] = input[inIdx] >>> (numBits - 8); // get the top byte from input
input[inIdx] <<= 8; // shift the read bits out, leaving next bits on top
remainingInputBits -= 8;
} else if (remainingInputBits == 8) {
output[outIdx] = input[inIdx] >>> (numBits - 8);
remainingInputBits = bitLength;
inIdx++;
} else {
remainingInputBits = 8 - remainingInputBits;
output[outIdx] = input[inIdx] >>> (numBits - 8);
inIdx++;
output[outIdx] |= input[inIdx] >>> (numBits - remainingInputBits);
input[inIdx] <<= remainingInputBits;
remainingInputBits = bitLength - remainingInputBits;
}
}
return output;
}
function pad(s, size) {
s = (s >>> 0).toString(2);
while (s.length < (size || 2)) { s = "0" + s; }
return s;
}
function printBinaryArray(arr, padLength) {
var str = "";
for (var i = 0; i < arr.length; i++)
str += pad(arr[i], padLength) + " ";
console.log(str);
}
var inputBytes = 22;
var bitLength = 11; // each value is 11-bit long
var input = new Uint8Array(inputBytes);
window.crypto.getRandomValues(input);
var encodedData = encode(input, bitLength);
console.log("Input data", input);
printBinaryArray(input, 8);
console.log("Encoded data");
// console.log(encodedData);
printBinaryArray(encodedData, bitLength);
var decodedData = decode(encodedData, bitLength);
console.log("Decoded data", decodedData);
printBinaryArray(decodedData, 8);
for (var i = 0; i < input.length; i++)
if (input[i] != decodedData[i])
console.log("Wrong decoded data");
console.log("Data decoded successfully");
In fact the encoding and decoding procedures are just inverse of each other, so you can easily modify them to encode(input, inputBitWidth, outputBitWidth)
that can be used for both encoding and decoding, just swap the input and output width
However for odd-sized values it's often better to pack the high bits together for easier access. For example 10-bit pixel formats often pack 4 pixels into a 5-byte group, with the 8 high bits of each pixel in the first 4 bytes, and the last byte contains the 2 low bits for them
See also