javascriptstatisticsweighted-average

A JavaScript function that selects a random integer from an array, with the goal of reaching a specified decimal average over multiple runs


I'm trying to write a JavaScript function that takes an array of integers and a decimal number as arguments, and randomly chooses one of the integers in such a way that if you ran the function with the same two arguments in the long run, you get the decimal number as the average. The array of integers is guaranteed to be in consecutive order, and the decimal number will be less than the maximum integer and greater than the minimum integer.

So far I've tried this:

function weightedRandomInteger(arr, target) {
    if (!Array.isArray(arr) || arr.length === 0 || typeof target !== "number") {
        return null;
    }

    const min = arr[0];
    const max = arr[arr.length - 1];

    if (target < min || target > max) {
        return null;
    }

    let weights = arr.map((num) => {
        const distance = Math.abs(target - num);
        return distance === 0 ? 1 : 1 / distance;
    });

    const totalWeight = weights.reduce((a, b) => a + b);
    const probabilities = weights.map((weight) => weight / totalWeight);

    const random = Math.random();
    let cumulativeProbability = 0;

    for (let i = 0; i < arr.length; i++) {
        cumulativeProbability += probabilities[i];
        if (random <= cumulativeProbability) {
            return arr[i];
        }
    }
}

It does really well when the target value is right in the middle of arr.

For instance if I run:

let total = 0;
let trials = 10000
for (let i = 0; i < trials; i++) {
    total += weightedRandomInteger([0, 1, 2, 3, 4, 5, 6, 7], 3.5);
}
console.log("Average is ", total/trials)

I get "Average is 3.4819"

However, if I run:

let total = 0;
let trials = 10000
for (let i = 0; i < trials; i++) {
    total += weightedRandomInteger([0, 1, 2, 3, 4, 5, 6, 7], 0.2);
}
console.log("Average is ", total/trials)

I get "Average is 0.9428"

It is absolutely necessary that given enough tries all values in arr will be returned a some point. Even if target is very far on the edge. I wouldn't expect:

weightedRandomInteger([0, 1, 2, 3, 4, 5, 6, 7], 0.2)

to return 7 that often, but it has to return it sometimes.


Solution

  • Oh, that takes me back to math class. I remember the Poisson distribution, which models events with a highly predictable outcome. For example in a football game (soccer), 1:1 is a very likely result, whereas a 9:2 is almost unheard of. This can be modeled with Poisson.

    So most results are close to the expected value, but it does spread quite a bit (depending on the expected value). Which sound like what you are looking for, though I am not sure if it spreads wide enough and gives you enough values at the edges.

    It goes from 0 to k, where k is a positive integer, but you can shift it. Same goes if range isn't wide enough, then you'd have to spread it.

    One issue though (thank you for the comment): The distribution handles skew towards 0 very well, but not toward the maximum value (like when expected value is 8 with a maximum value of 10). This seems to be nature of the beast - if you expect around 8 events starting at 0, counting only to 10 will make you miss some events.

    Our friends, the math people, might have a more graceful way to deal with it at math stack or stats stack, but a nitwit like me just cheats and inverts the distribution.

    Also note that whatever you choose as upper limit, there always remains a small probability to get a value above it. These values will be capped at the limit, which tends to make the average slightly smaller than requested.

    Here is an example:

    function poissonDist(max, avg){ // k and lambda in formula
      const factorial = (i) => i <= 0 ? 1 : i * factorial(i-1)
      return Array.from({length: max+1}, (_,i) => avg**i * Math.exp(-avg) / factorial(i) )
    }
    // or more efficiently:
    function poissonDistRec(max, avg){ // k and lambda in formula
      return Array(max+1).fill().reduce((pd, _, i) => (pd.push(i === 0 ? Math.exp(-avg) : pd[i-1] * avg/i), pd) ,[])
    }
    
    function getLookup(max, avg){
      const dist = poissonDistRec(max, avg)
      const lookup = dist.reduce((l,p,i) => [...l, p + (i && l[i-1])], []) // sum of all predecessors + current
      lookup[max] = 1 // add the remaining propability of values above max 
      return lookup
    }
    
    function* poissonGenerator(min, max, rawAvg){
      const avg = adjustAvg(min, max, rawAvg)
      const isInverted = avg !== rawAvg
      const lookup = getLookup(max-min, avg-min)
      while(true){
        const val = Math.random()
        const ix = lookup.findIndex(p => p > val)
        yield (isInverted ? max-ix: min+ix)
      }
    }
    
    function adjustAvg(min, max, avg){
      const mid = (max-min)/2 + min
      const useInvert = mid < avg // we need to invert avg if it is toward the end of the range
      return useInvert ? 2*mid - avg : avg
    }
    
    function getAverage(min, max, avg, runs){
      const freq = {}
      const gen = poissonGenerator(min, max, avg)
      for(let i = 0; i < runs; i++){
        const val = gen.next().value
        freq[val] = (freq[val] ?? 0) +1
      }
      console.log('Frequency:', freq)
      const sum = Object.entries(freq).reduce((s, [v,f]) => s + Number(v) * f,0)
      const runAvg = sum/runs
      console.log('Average:', runAvg)
    }
    
    function run(){
      const params = ['min', 'max', 'avg', 'runs'].map(id => document.getElementById(id).value).map(Number)
      getAverage(...params)
    }
    .as-console-wrapper { max-height: 80% !important; top: 60px; }
    input {
      width: 80px;
    }
    <label>min<input type=number id="min" value="3"/></label>
    <label>max<input type=number id="max"value="10"/></label>
    <label>avg<input type=number id="avg" value="8.7"/></label>
    <label>runs<input type=number id="runs" value="2000"/></label>
    <button onclick="run()">run</button>