encodingintegerbytebitstream

How to encode integers into other integers?


As a follow up to Store 2 4-bit numbers in 1 8 bit number, I am wondering if there is a generalization to it where you can store n x-bit numbers into m y-bit numbers. For example, maybe you can store 5 8-bit numbers into 3 15-bit numbers. Or maybe 2 8-bit numbers into 1 16-bit number, or 3 16-bit numbers into 2 32-bit numbers. Wondering what the implementation would be for encoding and decoding for a procedure that did this, or if it's not possible.

Something like:

function encode(i, s1, n, s2) {
  // i = array of input bytes
  // s1 = size of input bytes
  // n = number of output bytes
  // s2 = size of output bytes
}

function decode(i, s1, n, s2) {

}

Based on the answer below, I tried translating it to JavaScript but don't understand what anything really means and don't think it works.

function encode(input, inputSize, outputSize, callback) {
  var buffer = 0
  var bbits = 0
  var mask = (1 << outputSize) - 1
  while (bbits < outputSize) {
    buffer |= (input << bbits)
    bbits += inputSize
  }
  while (bbits >= outputSize) {
    callback(buffer & mask)
    buffer >>= outputSize
    bbits -= outputSize
  }
}

Solution

  • You can't store 5 8-bit numbers into 3 15-bit numbers because 45 bits of information obviously don't fit in 40 bits of memory. You can only do that if the total number of variations is smaller than or equal to 2k with k is the number of bits used for encoding

    If the width of every value is the same then here's my attempt, which stores the bits linearly in big endian. The encode function translates the bits in a byte array to another array that stores the full value in bitLength bits and the decode function does the reverse thing

    function encode(input, bitLength) {
      // size of each array element must be greater than bitLength
      var output = new Uint16Array(Math.ceil(input.length * 8 / bitLength));
      var remainingBits = bitLength; // the remaining bits left for the current value
    
      // example when bitLength = 11
      //       start of current value
      //       │          next value
      //       │2345678901│
      // ...┆  ↓    ┆     ↓ ┆       ┆       ┆       ┆...      ← input bytes
      // ...₀₁₂₃₄₅₆₇⁰¹²³⁴⁵⁶⁷₀₁₂₃₄₅₆₇⁰¹²³⁴⁵⁶⁷₀₁₂₃₄₅₆₇ ...      ← bit position
    
      for (var inIdx = 0, outIdx = 0; inIdx < input.length; inIdx++) {
        if (remainingBits > 8) {
          output[outIdx] = (output[outIdx] << 8) | input[inIdx];
          remainingBits -= 8;               // 8 less bits to read
        } else if (remainingBits == 8) {    // finish current value
          output[outIdx] = (output[outIdx] << 8) | input[inIdx];
          remainingBits = bitLength; // next byte is the start of the next output value
          outIdx++;
        } else {
          var nextRemainingBits = 8 - remainingBits;
          output[outIdx] = (output[outIdx] << remainingBits)
                         | (input[inIdx] >>> nextRemainingBits);
          // the leftover bits (nextRemainingBits) in the input byte
          // go into the next output
          output[++outIdx] = input[inIdx] & ((1 << nextRemainingBits) - 1);
          // adjust the number of remaining bits, after we've read
          // `8 - remainingBits` bits for the current output
          remainingBits = bitLength - nextRemainingBits;
        }
      }
      return output;
    }
    
    function decode(input, bitLength) {
        const numBits = input.BYTES_PER_ELEMENT*8;
      var output = new Uint8Array(Math.ceil(input.length * bitLength / 8));
      var remainingInputBits = bitLength; // the remaining bits left for the current value
      
      // shift value to the most significant position
      for (var i = 0; i < input.length; i++)
        input[i] <<= numBits - bitLength;
      
      for (var inIdx = 0, outIdx = 0; outIdx < output.length; outIdx++) {
        if (remainingInputBits > 8) {
          output[outIdx] = input[inIdx] >>> (numBits - 8);  // get the top byte from input
          input[inIdx] <<= 8;   // shift the read bits out, leaving next bits on top
          remainingInputBits -= 8;
        } else if (remainingInputBits == 8) {
          output[outIdx] = input[inIdx] >>> (numBits - 8);
          remainingInputBits = bitLength;
          inIdx++;
        } else {
          remainingInputBits = 8 - remainingInputBits;
          output[outIdx] = input[inIdx] >>> (numBits - 8);
          inIdx++;
          output[outIdx] |= input[inIdx] >>> (numBits - remainingInputBits);
          input[inIdx] <<= remainingInputBits;
          remainingInputBits = bitLength - remainingInputBits;
        }
      }
      return output;
    }
    
    function pad(s, size) {
      s = (s >>> 0).toString(2);
      while (s.length < (size || 2)) { s = "0" + s; }
      return s;
    }
    
    function printBinaryArray(arr, padLength) {
        var str = "";
        for (var i = 0; i < arr.length; i++)
            str += pad(arr[i], padLength) + " ";
        console.log(str);
    }
    
    var inputBytes = 22;
    var bitLength = 11; // each value is 11-bit long
    var input = new Uint8Array(inputBytes);
    
    window.crypto.getRandomValues(input);
    
    var encodedData = encode(input, bitLength);
    console.log("Input data", input);
    printBinaryArray(input, 8);
    console.log("Encoded data");
    // console.log(encodedData);
    printBinaryArray(encodedData, bitLength);
    
    var decodedData = decode(encodedData, bitLength);
    console.log("Decoded data", decodedData);
    printBinaryArray(decodedData, 8);
    
    for (var i = 0; i < input.length; i++)
        if (input[i] != decodedData[i])
            console.log("Wrong decoded data");
    console.log("Data decoded successfully");

    In fact the encoding and decoding procedures are just inverse of each other, so you can easily modify them to encode(input, inputBitWidth, outputBitWidth) that can be used for both encoding and decoding, just swap the input and output width

    However for odd-sized values it's often better to pack the high bits together for easier access. For example 10-bit pixel formats often pack 4 pixels into a 5-byte group, with the 8 high bits of each pixel in the first 4 bytes, and the last byte contains the 2 low bits for them

    See also