javascriptpatricia-trie

How can I read this Radix Tree structure to determine next-string probability?


In JavaScript, I am attempting to take a given user input and guess the 3 most likely words that might complete the user's currently (incomplete) typed word. The guess is based on the user's past inputs. I'm working on this here, in this JSFiddle.

The structure I build to record the user's past inputs is a modified Radix Tree (AKA Patricia Trie):

Input: "hey"

{
    "h": {
        "value": "h",
        "count": 1,
        "followables": {
            "e": {
                "value": "e",
                "count": 1,
                "followables": {
                    "y": {
                        "value": "y",
                        "count": 1,
                        "followables": {}
                    }
                }
            }
        }
    }
}

This data structure is built perfectly, and I think it's the best structure to achieve the goal described. My problem is the function for reading the Radix Tree data to define the 3 most likely words for a given input. In the above data for example, if the user inputs "h", the guessing function should return an object like this:

guess : {
   1 : "hey",
   2 : "",
   3 : ""
}

So here's my code/progress:

Learn - take completed input string and organize the combination into the Radix Tree (brain):

function learn(message, brain) {
    if (message.length == 0) return {}; // or do something else
    var ch = message[0]; // get the first character
    if (!brain[ch]) { // create new node when not exists
        brain[ch] = {
            value: ch,
            count: 1,
            followables: {}
        };
    } else { // increment count when exist
        brain[ch].count += 1;
    }
    var substr = message.substring(1); // remove first character
    if (substr) { // do it for the remaining substring
        brain[ch].followables = learn(substr, brain[ch].followables);
    } else {
        renderData();
    }
    return brain;
}

That's all done right. Unfortunately, the next code, meant to read the data and guess the word that the user is typing, is not good. I'm having trouble with what's to me a very complex function. I've divided it into small functions, as I've learned is the best practice, but I'm afraid I've made a mess that could probably be much simpler:

Guess - take the "learned" string data and make 3 guesses at which word the user might be typing:

function guess(progress, brain) {
    console.log("Guessing based on: " + progress);
    var guesses = {
        0: "",
        1: "",
        2: ""
    }
    var firstChar = progress[0]; 
    if (brain[firstChar]) {
        var step = brain[firstChar];
        for (var i = 0; i < progress.length; i++) {
            var char = progress[i];
            if (step.followables[char]) {
                step = step.followables[char];
                if (i == progress.length) {
                    var guesses = nextStrings(step.followables);
                    renderGuesses(guesses);
                }
            } else {
                renderGuesses(guesses);
            }
        }
    } else {
        renderGuesses(guesses);
    }
}

function renderGuesses(guesses) {
    console.log(guesses);
    $('#guess-1').text(guesses[0]);
    $('#guess-2').text(guesses[1]);
    $('#guess-3').text(guesses[2]);
}

function nextStrings(followables) {
    console.log('Searching for next string...');
    var results;
    if (followables.length > 0) {
        results = chooseRoutes(followables);
    } else {
        results = {
            0: "",
            1: "",
            2: ""
        }
    }
    console.log(result);
    return result;
}

function chooseRoutes(followables) {
    var results = {
        0: {
            value: "",
            count: 0
        },
        1: {
            value: "",
            count: 0
        },
        2: {
            value: "",
            count: 0
        }
    };
    for (var i = 0; i < followables.length; i++) {
        var count = followables[i].count;
        if (count > results[0].count) {
            results[0].value = followStr(followables[i], "");
        } else if (count > results[1].count) {
            results[1].value = followStr(followables[i], "");
        } else if (count > results[2].count) {
            results[2].value = followStr(followables[i], "");
        }
    }
    console.log(results);
    return results;
}

function followStr(followables, str) {
    var guess = {
        value: "",
        count: 0
    };
    for (var i = 0; i < followables.length; i++) {
        if (followables[i].count > guess.count) {
            guess = followables[i];
        }
    }
    followables = guess.followables;
    if (guess.value != " ") {
        str += guess;
        followStr(followables, str);
    } else {
        console.log(str);
        return str;
    }
}

Sidenote - While a fuzzy string search made on a dictionary is a more common approach to this, the learning method is a great way to tailor guesses to the user's writing/messaging style and supporting the user's non-standard vocabulary ("heyy", "sup", ":P", "lol") - the results of these guesses can be combined with (and take priority over) standard dictionary results.


Solution

  • The structure you used for dictionary is not correct, it should contain array(s) of objects. For example, after you enter these words:

    hi
    hi
    hi
    hi
    hi
    hey
    hello
    hella
    

    the structure should be:

    history: [{
        letter: "h",
        count: 8,
        followables: [{
            letter: "e",
            count: 3,
            followables: [{
                letter: "y",
                count: 1,
                followables: []
            }, {
                letter: "l",
                count: 2,
                followables: [{
                    letter: "l",
                    count: 2,
                    followables: [{
                        letter: "o",
                        count: 1,
                        followables: []
                    }, {
                        letter: "a",
                        count: 1,
                        followables: []
                    }]
                }]
            }]
        }, {
            letter: "i",
            count: 5,
            followables: []
        }]
    }]
    

    The way you create and store the history (I would use localStorage) was not interesting to me. Focus was on recursive functions that dig deep inside the tree to get suggestions. This one gets final followables for given word:

    findChildren: function (node, depth) {
        /* Traverse history for current word, there is only one path */
        for (k in node) {
            if (node[k].letter == app.progress[depth]) {
                if (depth + 1 == app.progress.length) {
                    /* Found it, hope it has followables */
                    return node[k].followables;
                } else {
                    /* Must go deeper... */
                    return app.findChildren(node[k].followables, depth + 1);
                };
            };
        };
        /* No results */
        return false;
    }
    

    and second one (getWord) creates suggestions:

    countWordsFromNode: function (node) {
        for (i in node) {
            if (node[i].followables.length) {
                app.countWordsFromNode(node[i].followables);
            } else {
                app.totalInNode++;
            };
        };
    },
    getWord: function (node, limit) {
        /* First sort by count */
        var sorted = node.sort(function (n1, n2) {
            return n2.count - n1.count;
        });
        for (k in sorted) {
            app.guesses[app.totalFound].word += sorted[k].letter;
            if (sorted[k].followables.length) {
                app.totalInNode = 0;
                app.countWordsFromNode(sorted[k].followables);
                for (m = 1; m < app.totalInNode; m++) {
                    if ((app.totalFound + m) < limit) {
                        app.guesses[app.totalFound + m].word += sorted[k].letter;
                    };
                };
                app.getWord(sorted[k].followables, limit);
            } else {
                /* End of word */
                app.totalFound++;
            };
            if (app.totalFound >= limit) {
                /* Found all suggestions */
                break;
            };
        };
    }
    

    You can see details in this Fiddle, I had to remove some code of yours. It will be easy to integrate it anywhere, for example you can set other suggestion fields, current are:

    guesses: [
        {
            element: $('#guess-1'),
            word: ''
        },
        {
            element: $('#guess-2'),
            word: ''
        },
        {
            element: $('#guess-3'),
            word: ''
        }
    ]
    

    Edit: Fixed bug with adding letters to the right.