I am using a trie made to work on alphabets/scripts from any language.
class TrieNode {
constructor(key) {
// the "key" value will be the character in sequence
this.key = key;
// we keep a reference to parent
this.parent = null;
// we have hash of children
this.children = {};
// check to see if the node is at the end
this.end = false;
}
getWord() {
let output = [];
let node = this;
while (node !== null) {
output.unshift(node.key)
node = node.parent
}
return output.join('')
}
}
class Trie {
constructor() {
this.base = new TrieNode(null)
}
insert(word) {
let node = this.base
const points = Array.from(word)
for (const i in points) {
const point = points[i]
if (!node.children[point]) {
const child = node.children[point] = new TrieNode(point)
child.parent = node
}
node = node.children[point]
if (i == word.length - 1) {
node.end = true
}
}
}
contains(word) {
let node = this.base
const points = Array.from(word)
for (const i in points) {
const point = points[i]
if (node.children[point]) {
node = node.children[point]
} else {
return false
}
}
return node.end;
}
find(prefix) {
let node = this.base
let output = []
const points = Array.from(prefix)
for (const i in points) {
const point = points[i]
// make sure prefix actually has words
if (node.children[point]) {
node = node.children[point]
} else {
// there's none. just return it.
return output
}
}
const stack = [node]
while (stack.length) {
node = stack.shift()
// base case, if node is at a word, push to output
if (node.end) {
output.unshift(node.getWord())
}
// iterate through each children, call recursive findAllWords
for (var child in node.children) {
stack.push(node.children[child])
}
}
return output
}
}
Here is how you can find all words matching prefix. These scrabble words were used (tmp/scrabble.csv
).
const fs = require('fs')
const Trie = require('./Trie')
const words = fs.readFileSync('tmp/scrabble.csv', 'utf-8')
.trim()
.split(/\n+/)
.map(x => x.trim())
const trie = new Trie()
words.forEach(word => trie.insert(word))
console.log(trie.find('zy'))
[
'zygodactylous', 'zygomorphies', 'zygapophysis',
'zygapophyses', 'zygomorphic', 'zymologies',
'zygospores', 'zygosities', 'zygomorphy',
'zygodactyl', 'zymurgies', 'zymograms',
'zymogenes', 'zygotenes', 'zygospore',
'zygomatic', 'zyzzyvas', 'zymosans',
'zymology', 'zymogram', 'zymogens',
'zymogene', 'zygotene', 'zygosity',
'zygomata', 'zyzzyva', 'zymurgy',
'zymotic', 'zymosis', 'zymoses',
'zymosan', 'zymogen', 'zymases',
'zygotic', 'zygotes', 'zygosis',
'zygoses', 'zygomas', 'zydecos',
'zymase', 'zygote', 'zygose',
'zygoma', 'zygoid', 'zydeco',
'zymes', 'zyme'
]
However, search find('a')
will find over 10,000 matches (words starting with a
). If this were a web app, you might not want to show all 10,000 at once, maybe only 100 or 1,000 at a time. How could you implement a pagination system against this trie? Is it possible?
From my initial thought-attempts, I was trying to think how you could keep track of the "path" you left off with in the trie (say we are paginating 100 matches/words at a time). So I am trying to modify this function:
class Trie {
// ...
find(prefix, max = 100) {
let node = this.base
let output = []
const points = Array.from(prefix)
for (const i in points) {
const point = points[i]
// make sure prefix actually has words
if (node.children[point]) {
node = node.children[point]
} else {
// return path [-1] since there is no match?
return [output, [-1]]
}
}
const stack = [node]
const pathStack = [[0]]
while (stack.length) {
const path = pathStack.shift()
node = stack.shift()
// base case, if node is at a word, push to output
if (node.end) {
output.unshift(node.getWord())
if (output.length === max) {
return [output, path]
}
}
// iterate through each children, call recursive findAllWords
let i = 0
for (var child in node.children) {
pathStack.push(path.concat(i++))
stack.push(node.children[child])
}
}
return [output, [-1]]
}
}
Maybe this is partially correct? But I don't see how to use the path to jump back to the right spot on the next query.
[
[
'abye', 'abut', 'abri', 'abos', 'ably', 'able', 'abet', 'abed',
'abbe', 'abba', 'abas', 'aals', 'aahs', 'azo', 'ays', 'aye',
'axe', 'awn', 'awl', 'awe', 'awa', 'avo', 'ave', 'ava',
'auk', 'att', 'ate', 'ass', 'asp', 'ask', 'ash', 'art',
'ars', 'arm', 'ark', 'arf', 'are', 'arc', 'arb', 'apt',
'ape', 'any', 'ant', 'ani', 'ane', 'and', 'ana', 'amu',
'amp', 'ami', 'ama', 'alt', 'als', 'alp', 'all', 'ale',
'alb', 'ala', 'ait', 'ais', 'air', 'ain', 'aim', 'ail',
'aid', 'aha', 'ago', 'age', 'aga', 'aft', 'aff', 'adz',
'ads', 'ado', 'add', 'act', 'ace', 'aby', 'abs', 'abo',
'aba', 'aas', 'aal', 'aah', 'ay', 'ax', 'aw', 'at',
'as', 'ar', 'an', 'am', 'al', 'ai', 'ah', 'ag',
'ae', 'ad', 'ab', 'aa'
],
[ 0, 1, 17, 0 ]
]
And plus, the words appear to be in unusual order, maybe I am not inserting them the correct order?
This way, in theory, if you had the path
, then when you call find(prefix, startAtPath)
, you would pass the startAtPath
as a second parameter, and start the search from that point forward, to get the next 100 words. I haven't seen any "trie pagination" examples on the web, so maybe it's not a thing that's possible, not sure.
In TrieNode.constructor
add a this.count
property.
In insert
, update the counts for the insertion.
In find
add startAt
and max
parameters. As long as node.count < startAt
you will skip past node
(do not add its children to pathStack
) and decrease startAt
by node.count
.
And then you collect up to max
words to return.
This allows you to skip past previous results without having to enumerate them. You do not need to pass in the path
, just where you expect to start.
The cause of your order problem appears to be that you are using unshift
instead of push
.
Here is your code edited to give the right answer. I also fixed a minor bug that your output order depends on the order in which node.children
was inserted. This will work if you load the trie in alphabetical order, but not otherwise.
class TrieNode {
constructor(key) {
// the "key" value will be the character in sequence
this.key = key;
// we keep a reference to parent
this.parent = null;
// we have hash of children
this.children = {};
// check to see if the node is at the end
this.end = false;
// count how many words are at this node or below.
this.count = 0;
// Ensure children are in the right order.
this.lastChildKey = '';
}
getWord() {
let output = [];
let node = this;
while (node !== null) {
output.unshift(node.key)
node = node.parent
}
return output.join('')
}
}
class Trie {
constructor() {
this.base = new TrieNode(null)
}
insert(word) {
let node = this.base
const points = Array.from(word)
for (const i in points) {
const point = points[i]
node.count++
if (!node.children[point]) {
const child = node.children[point] = new TrieNode(point)
child.parent = node
}
node = node.children[point]
if (i == word.length - 1) {
node.end = true
}
// If words were inserted out of order, fix order of children.
if (node.lastChildKey <= point) {
node.lastChildKey = point
}
else {
// Need to fix order of children.
let keys = Object.keys(node.children)
keys.sort()
let orderedChildren = {}
for (const key in keys) {
orderedChildren[key] = node.children[key]
}
node.children = orderedChildren
}
}
}
contains(word) {
let node = this.base
const points = Array.from(word)
for (const i in points) {
const point = points[i]
if (node.children[point]) {
node = node.children[point]
} else {
return false
}
}
return node.end;
}
find(prefix, startAt, maxCount) {
let node = this.base
let output = []
const points = Array.from(prefix)
for (const i in points) {
const point = points[i]
// make sure prefix actually has words
if (node.children[point]) {
node = node.children[point]
} else {
// there's none. just return it.
return output
}
}
const stack = [node]
while (stack.length) {
if (maxCount <= output.length) {
return output
}
node = stack.shift()
if (node.count < startAt) {
startAt -= node.count
}
else {
// base case, if node is at a word, push to output
if (node.end) {
if (0 < startAt) {
startAt--;
}
else {
output.push(node.getWord())
}
}
// iterate through each children, call recursive findAllWords
for (var child in node.children) {
stack.push(node.children[child])
}
}
}
return output
}
}
exports.Trie = Trie
Here is your demo program edited for the changes.
const fs = require('fs')
const Trie = require('./Trie')
const words = fs.readFileSync('tmp/scrabble.csv', 'utf-8')
.trim()
.split(/\n+/)
.map(x => x.trim())
const trie = new Trie.Trie()
words.forEach(word => trie.insert(word))
console.log(trie.find('a', 0, 12))
console.log(trie.find('a', 6, 6))