javascriptalgorithmcaching

How to add cache for solution to LeetCode Sum of Distances in Tree?


I am working on 834. Sum of Distances in Tree:

There is an undirected connected tree with n nodes labeled from 0 to n - 1 and n - 1 edges.

You are given the integer n and the array edges where edges[i] = [ai, bi] indicates that there is an edge between nodes ai and bi in the tree.

Return an array answer of length n where answer[i] is the sum of the distances between the ith node in the tree and all other nodes.

I tried to come up with my own solution, but got a Time Limit Exceeded (TLE) error. I realised that I need to add a cache for the solution.

Here is my code:

/**
 * @param {number} n
 * @param {number[][]} edges
 * @return {number[]}
 */
var sumOfDistancesInTree = function(n, edges) {
    // * g: graph
    const graph = {};
    let res = [];
    let cache = {};

    // * g: build graph
    for(let i=0; i<edges.length; ++i) {
        const ind1 = parseInt(edges[i][0]);
        const ind2 = parseInt(edges[i][1]);

        graph[ind1] = graph[ind1] || [];
        graph[ind1].push(ind2);

        graph[ind2] = graph[ind2] || [];
        graph[ind2].push(ind1);
    }

    const recur = (origNode, parent, node, dist) => {

        // if(cache[?]) {
        //     return cache[?]
        // }

        const arr = graph[node];

        let sum = dist;
        for(let i=0; i<arr?.length; ++i) {
            const childnode = arr[i];

            if(parent === childnode) {
                continue;
            }

            const tmp = recur(origNode, node, childnode, dist+1);
            sum = sum + tmp;
        }

        // * g: try to add cache, but not sure what should be the cache index
        //cache[?] = sum;

        return sum;
    }

    // * g: graph is done
    for(let i=0; i<n; ++i) {
        const out = recur(i, i, i, 0);
        res.push(out);
    }

    return res;
};

The normal cache pattern is we cached the result at the end of func. On top, if we see cache value match, then we return cache value. (code above)

I am not sure what should be the cache index here.

I understand that start point to a particular point can go forward and backward, hence cache index can be start_pt + '' + end_point (forward) AND end_point + '' + start_pt (backward), to cache a particular path, but I am not able to fit to the cache pattern in the code above.

One test case that fails (as it takes too much time to finish) has 30000 nodes (numbered 0 to 29999) where node 0 has an edge to every other node, making all other nodes leaves of the tree.


Solution

  • There are faster solutions, but to answer your question, you should first make sure that the recursive call returns information that does not depend on the path you have already followed. In particular, caching something that has dist in it, reflects information that comes from outside the scope of the recursive call. That is not very practical for memoizing.

    So first change the logic so that you don't pass dist, but return information that only depends on the edge you just traversed (from parent to node). Return:

    Also, you don't really use the origNode parameter, so we can drop that one as well:

    var sumOfDistancesInTree = function(n, edges) {
        // * g: graph
        const graph = {};
        let res = [];
        let cache = {};
    
        // * g: build graph
        for(let i=0; i<edges.length; ++i) {
            const ind1 = parseInt(edges[i][0]);
            const ind2 = parseInt(edges[i][1]);
    
            graph[ind1] = graph[ind1] || [];
            graph[ind1].push(ind2);
    
            graph[ind2] = graph[ind2] || [];
            graph[ind2].push(ind1);
        }
    
        const recur = (parent, node) => {
    
            // if(cache[?]) {
            //     return cache[?]
            // }
    
            const arr = graph[node];
    
            const result = { numPaths: 0, sumPathLengths: 0 };
            for(let i=0; i<arr?.length; ++i) {
                const childnode = arr[i];
    
                if(parent === childnode) {
                    continue;
                }
    
                const tmp = recur(node, childnode);
                result.numPaths += tmp.numPaths;
                result.sumPathLengths += tmp.sumPathLengths;
            }
            if (parent != node) { // If we have follewed an edge:
                result.numPaths += 1; // The path that stops at node
                result.sumPathLengths += result.numPaths; // ...add edge to all paths
            }
    
            //cache[?] = sum;
    
            return result;
        }
    
        // * g: graph is done
        for(let i=0; i<n; ++i) {
            const out = recur(i, i);
            res.push(out.sumPathLengths);
        }
    
        return res;
    };
    

    Now we can think of memoization:

    As the call depends on parent and node, the key for your cache is this pair. You could multiply one by n and then sum them up to get a unique key:

    var sumOfDistancesInTree = function(n, edges) {
        // * g: graph
        const graph = {};
        let res = [];
        let cache = {};
    
        // * g: build graph
        for(let i=0; i<edges.length; ++i) {
            const ind1 = parseInt(edges[i][0]);
            const ind2 = parseInt(edges[i][1]);
    
            graph[ind1] = graph[ind1] || [];
            graph[ind1].push(ind2);
    
            graph[ind2] = graph[ind2] || [];
            graph[ind2].push(ind1);
        }
    
        const recur = (parent, node) => {
            const key = parent * n + node;
    
            if(cache[key]) {
                return cache[key];
            }
    
            const arr = graph[node];
    
            const result = { numPaths: 0, sumPathLengths: 0 };
            for(let i=0; i<arr?.length; ++i) {
                const childnode = arr[i];
    
                if(parent === childnode) {
                    continue;
                }
    
                const tmp = recur(node, childnode);
                result.numPaths += tmp.numPaths;
                result.sumPathLengths += tmp.sumPathLengths;
            }
            if (parent != node) { // If we have follewed an edge:
                result.numPaths += 1; // The path that stops at node
                result.sumPathLengths += result.numPaths; // ...add edge to all paths
            }
    
            cache[key] = result;
            return result;
        }
    
        // * g: graph is done
        for(let i=0; i<n; ++i) {
            const out = recur(i, i);
            res.push(out.sumPathLengths);
        }
    
        return res;
    };
    

    This code will still take too long because even if you have cached the values for the edges, this still means that all neighbors of a node need to be visited, which can be a lot for a tree with a high branching factor.

    You could improve on this to also check the cached value you might already have for a node, i.e. with key node * node + node:

    var sumOfDistancesInTree = function(n, edges) {
        // * g: graph
        const graph = {};
        let res = [];
        let cache = {};
    
        // * g: build graph
        for(let i=0; i<edges.length; ++i) {
            const ind1 = parseInt(edges[i][0]);
            const ind2 = parseInt(edges[i][1]);
    
            graph[ind1] = graph[ind1] || [];
            graph[ind1].push(ind2);
    
            graph[ind2] = graph[ind2] || [];
            graph[ind2].push(ind1);
        }
    
        const recur = (parent, node) => {
            const key = parent * n + node;
    
            if(cache[key]) {
                return cache[key];
            }
            const keyNode = node * n + node; // The key for the node (not the edge)
            if (cache[keyNode]) { // We have the info for the node and the opposite edge:
                const nodeResult = cache[keyNode];
                const keyOppositeEdge = node * n + parent;
                const edgeResult = cache[keyOppositeEdge];
                // Derive the info for the forward edge
                const numPaths = nodeResult.numPaths - edgeResult.numPaths + 1;
                const result = {
                    numPaths, 
                    sumPathLengths: nodeResult.sumPathLengths - edgeResult.sumPathLengths + numPaths
                };
                cache[key] = result;
                return result;
            }
    
            const arr = graph[node];
    
            const result = { numPaths: 0, sumPathLengths: 0 };
            for(let i=0; i<arr?.length; ++i) {
                const childnode = arr[i];
    
                if(parent === childnode) {
                    continue;
                }
    
                const tmp = recur(node, childnode);
                result.numPaths += tmp.numPaths;
                result.sumPathLengths += tmp.sumPathLengths;
            }
            if (parent != node) { // If we have followed an edge:
                result.numPaths += 1; // The path that stops at node
                result.sumPathLengths += result.numPaths; // ...add edge to all paths
            }
    
            cache[key] = result;
            return result;
        }
    
        // * g: graph is done
        for(let i=0; i<n; ++i) {
            const out = recur(i, i);
            res.push(out.sumPathLengths);
        }
    
        return res;
    };
    

    This runs in an acceptable time, but there is a lot of caching involved, which has its overhead.

    Another approach

    The more common approach is to perform two DFS traversals from a chosen root (like node 0), one to accumulate the "downward" counts/sums, and one to accumulate the "upward" counts/sums, using the results from the first traversal.

    For instance like this:

    var sumOfDistancesInTree = function (n, edges) {
        const graph = Array.from({length: n}, () => []);
        const numPaths = Array(n).fill(1);
        const sumPathLengths = Array(n).fill(0);
    
        for (const [node1, node2] of edges) {
            graph[node1].push(node2);
            graph[node2].push(node1);
        }
    
        function dfs1(parent, node) {
            for (const child of graph[node]) {
                if (child === parent) {
                    continue;
                }
                dfs1(node, child);
                numPaths[node] += numPaths[child];
                sumPathLengths[node] += sumPathLengths[child] + numPaths[child];
            }
        }
    
        function dfs2(parent, node) {
            for (const child of graph[node]) {
                if (child === parent) {
                    continue;
                }
                sumPathLengths[child] = sumPathLengths[node] - 2 * numPaths[child] + n;
                dfs2(node, child);
            }
        }
    
        dfs1(0, 0);
        dfs2(0, 0);
        return sumPathLengths;
    };