javascriptradix

Interpret string as number; reencode it into another base


Consider the concept of a "number alphabet"; a "number alphabet" is a String where each character of the String represents a valid digit that can be used to represent a number. The overall length of the String represents the base of that specific "number alphabet". The order of the characters in the String represents the successive values of each character. A character may not repeat throughout the "number alphabet".

For example, the String '0123456789' represents typical decimal numbers; any String whose characters are all found within '0123456789' is able to be interpreted as a decimal value. Specifically, we should be able to say interpret('0123456789', '127') === 127. This interpret function takes a "number alphabet" and a value within that "number alphabet", and returns the numeric value.

It's not too hard to implement interpret:

const interpret = (alphabet, str) => {
  
  let sum = 0;
  const base = alphabet.length;
  const maxInd = str.length - 1;
  
  for (let i = 0; i <= maxInd; i++) {
    
    // Simple to get value of char within alphabet - I know this is O(m * n) but not
    // concerned about that here
    const chrVal = alphabet.indexOf(str[i]);
    
    // Earlier characters count for more, same as how in the number 4278 the 4 represents
    // 4000, larger than the 2 which is 200, larger than the 7 which is 70, etc.
    const placeInd = maxInd - i;
    
    // The "place" value in `str`, same as how in the number 4278 the 4 is 1000s, 2 is
    // 100s, 7 is 10s, etc.
    const placeVal = base ** placeInd;
        
    // Overall the value contributed by this "place" in `str` is `placeVal` times the
    // value of the character in `str` at this place
    sum += placeVal * chrVal;
    
  }
  return sum;
  
};

const tests = [
  { alpha: '0123456789', str: '20' },
  { alpha: '0123456789', str: '119992' },
  { alpha: '0123456789', str: '0000119992' },
  { alpha: '0123456789abcdef', str: 'ff' },
  { alpha: '0123456789abcdef', str: 'beef' },
  { alpha: '0123456789abcdef', str: 'deadbeef' },
  { alpha: '0123456789abcdef', str: 'a0' },
  { alpha: '01', str: '100111100' },
  { alpha: '-+', str: '+--++++--' },
  { alpha: '~!@#', str: '@#!#' }, // Now there's no reason we can't do this
];
for (const test of tests) console.log(`interpret('${test.alpha}', '${test.str}') = ${interpret(test.alpha, test.str)}`);

My main goal, however, is to translate a string from any arbitrary alphabet to any other; I am calling this task reinterpret and here is an example usage:

reinterpret('0123456789', '01', '1337') === '10100111001'

In this case "0123456789" is given as the source alphabet and "01" as the target alphabet; the string '1337' represents 1337 in the source alphabet, and reinterpret returns '10100111001', representing the same value in the target alphabet.

You'd think implementating reinterpret would be easy as we can easily write a function which reverses interpret (call it uninterpret) and then simply implement as:

reinterpret = (srcAlpha, trgAlpha, str) => uninterpret(trgAlpha, interpret(srcAlpha, str));

There is a problem, though. Here is an uninterpret implementation; note that it reverses the results from interpret:

const logBaseHandleLikelyIntegers = (base, num) => {
  const eps = 0.000000000000005;
  const rough = Math.log(num) / Math.log(base); // May have a floating point error
  
  // Try to correct floating point error when it looks like it should be an integer
  if (Math.floor(rough + eps) > rough) return Math.floor(rough) + 1;
  if (Math.ceil( rough - eps) < rough) return Math.ceil(rough) - 1;
  
  return rough;
};

const uninterpret = (alpha, num) => {
  
  const base = alpha.length;
    
  // Use log where the base is the alphabet length to determine number of chars needed
  const digitsNeeded = Math.floor(logBaseHandleLikelyIntegers(base, num) + 1);
    
  let str = '';
  for (let p = digitsNeeded - 1; p >= 0; p--) {
    const pow = Math.pow(base, p);
    const div = Math.floor(num / pow);
    str += alpha[div];
    num -= pow * div;
  }
  
  return str;
  
};

const tests = [
  { alpha: '0123456789', num: 20 },
  { alpha: '0123456789', num: 119992 },
  { alpha: '0123456789abcdef', num: 255 },
  { alpha: '0123456789abcdef', num: 48879 },
  { alpha: '0123456789abcdef', num: 3735928559 },
  { alpha: '0123456789abcdef', num: 160 },
  { alpha: '01', num: 316 },
  { alpha: '-+', num: 316 },
  { alpha: '~!@#', num: 183 }, // Now there's no reason we can't do this
  { alpha: '0123456789', num: 1000 }, // Cases like this require an integer log result and necessitate the helper function
];
for (const test of tests) console.log(`uninterpret('${test.alpha}', '${test.num}') = ${uninterpret(test.alpha, test.num)}`);

Now here is a reinterpret implementation using interpret and uninterpret:

const logBaseHandleLikelyIntegers = (base, num) => {
  const eps = 0.000000000000005;
  const rough = Math.log(num) / Math.log(base); // May have a floating point error
  
  // Try to correct floating point error when it looks like it should be an integer
  if (Math.floor(rough + eps) > rough) return Math.floor(rough) + 1;
  if (Math.ceil( rough - eps) < rough) return Math.ceil(rough) - 1;
  
  return rough;
};

const interpret = (alphabet, str) => {
  
  let sum = 0;
  const base = alphabet.length;
  const maxInd = str.length - 1;
  
  for (let i = 0; i <= maxInd; i++) {
    
    // Simple to get value of char within alphabet - I know this is O(m * n) but not
    // concerned about that here
    const chrVal = alphabet.indexOf(str[i]);
    
    // Earlier characters count for more, same as how in the number 4278 the 4 represents
    // 4000, larger than the 2 which is 200, larger than the 7 which is 70, etc.
    const placeInd = maxInd - i;
    
    // The "place" value in `str`, same as how in the number 4278 the 4 is 1000s, 2 is
    // 100s, 7 is 10s, etc.
    const placeVal = base ** placeInd;
        
    // Overall the value contributed by this "place" in `str` is `placeVal` times the
    // value of the character in `str` at this place
    sum += placeVal * chrVal;
    
  }
  return sum;
  
};
const uninterpret = (alpha, num) => {
  
  const base = alpha.length;
    
  // Use log where the base is the alphabet length to determine number of chars needed
  const digitsNeeded = Math.floor(logBaseHandleLikelyIntegers(base, num) + 1);
    
  let str = '';
  for (let p = digitsNeeded - 1; p >= 0; p--) {
    const pow = Math.pow(base, p);
    const div = Math.floor(num / pow);
    str += alpha[div];
    num -= pow * div;
  }
  
  return str;
  
};
const reinterpret = (srcAlpha, trgAlpha, str) => uninterpret(trgAlpha, interpret(srcAlpha, str));

const reinterpretTest = (src, trg, str) => {
  console.log(`Reinterpret the value "${str}" in alphabet "${src}", under alphabet "${trg}":`);
  console.log(`Result: ${reinterpret(src, trg, str)}`);
};

console.log('Base 10 -> base 2 test:');
reinterpretTest('0123456789', '01', '32');

console.log('\nBase 10 -> base 16 tests:');
reinterpretTest('0123456789', '0123456789abcdef', '1287589');
reinterpretTest('0123456789', '0123456789abcdef', '87771287589933381');
reinterpretTest('0123456789', '0123456789abcdef', '87771287589933382');

Run the above code and... uh oh, two different strings '87771287589933381' and '87771287589933382' were interpreted to the same value, '137d3856246d140'. Why? Because these strings are large, and the numbers in uninterpret are getting so large they are losing precision.

How can I implement reinterpret to work on inputs of arbitrary sizes? I think it's tricky; I think exponentiation needs to be avoided somehow.

I'm not at all asking about the maximum size/precision of Number; I'm looking to circumvent a limitation that exists in my current code as a result of that maximum precision.


Solution

  • BigInt

    const interpret = (alphabet, str) => {
      
      let sum = 0n;
      const base = alphabet.length;
      const maxInd = str.length - 1;
      
      for (let i = 0; i <= maxInd; i++) {
        
        // Simple to get value of char within alphabet - I know this is O(m * n) but not
        // concerned about that here
        const chrVal = alphabet.indexOf(str[i]);
        
        // Earlier characters count for more, same as how in the number 4278 the 4 represents
        // 4000, larger than the 2 which is 200, larger than the 7 which is 70, etc.
        const placeInd = maxInd - i;
        
        // The "place" value in `str`, same as how in the number 4278 the 4 is 1000s, 2 is
        // 100s, 7 is 10s, etc.
        const placeVal = BigInt(base) ** BigInt(placeInd);
            
        // Overall the value contributed by this "place" in `str` is `placeVal` times the
        // value of the character in `str` at this place
        sum += placeVal * BigInt(chrVal);
        
      }
      return sum;
      
    };
    const uninterpret = (alpha, num) => {
      
      const base = BigInt(alpha.length);
      const digits = [];
    
      while (num) {
        digits.push(alpha[num % base]);
        num /= base;
      }
      
      return digits.reverse().join("");
      
    };
    const reinterpret = (srcAlpha, trgAlpha, str) => uninterpret(trgAlpha, interpret(srcAlpha, str));
    
    const reinterpretTest = (src, trg, str) => {
      console.log(`Reinterpret the value "${str}" in alphabet "${src}", under alphabet "${trg}":`);
      console.log(`Result: ${reinterpret(src, trg, str)}`);
    };
    
    console.log('Base 10 -> base 2 test:');
    reinterpretTest('0123456789', '01', '32');
    
    console.log('\nBase 10 -> base 16 tests:');
    reinterpretTest('0123456789', '0123456789abcdef', '1287589');
    reinterpretTest('0123456789', '0123456789abcdef', '87771287589933381');
    reinterpretTest('0123456789', '0123456789abcdef', '87771287589933382');

    N.b. uninterpret(alpha, 0n) === ""