I am working on 834. Sum of Distances in Tree:
There is an undirected connected tree with
n
nodes labeled from 0 ton - 1
andn - 1
edges.You are given the integer
n
and the arrayedges
whereedges[i] = [ai, bi]
indicates that there is an edge between nodesai
andbi
in the tree.Return an array answer of length
n
whereanswer[i]
is the sum of the distances between theith
node in the tree and all other nodes.
I tried to come up with my own solution, but got a Time Limit Exceeded (TLE) error. I realised that I need to add a cache for the solution.
Here is my code:
/**
* @param {number} n
* @param {number[][]} edges
* @return {number[]}
*/
var sumOfDistancesInTree = function(n, edges) {
// * g: graph
const graph = {};
let res = [];
let cache = {};
// * g: build graph
for(let i=0; i<edges.length; ++i) {
const ind1 = parseInt(edges[i][0]);
const ind2 = parseInt(edges[i][1]);
graph[ind1] = graph[ind1] || [];
graph[ind1].push(ind2);
graph[ind2] = graph[ind2] || [];
graph[ind2].push(ind1);
}
const recur = (origNode, parent, node, dist) => {
// if(cache[?]) {
// return cache[?]
// }
const arr = graph[node];
let sum = dist;
for(let i=0; i<arr?.length; ++i) {
const childnode = arr[i];
if(parent === childnode) {
continue;
}
const tmp = recur(origNode, node, childnode, dist+1);
sum = sum + tmp;
}
// * g: try to add cache, but not sure what should be the cache index
//cache[?] = sum;
return sum;
}
// * g: graph is done
for(let i=0; i<n; ++i) {
const out = recur(i, i, i, 0);
res.push(out);
}
return res;
};
The normal cache pattern is we cached the result at the end of func. On top, if we see cache value match, then we return cache value. (code above)
I am not sure what should be the cache index here.
I understand that start point to a particular point can go forward and backward, hence cache index can be start_pt + '' + end_point (forward) AND end_point + '' + start_pt (backward), to cache a particular path, but I am not able to fit to the cache pattern in the code above.
One test case that fails (as it takes too much time to finish) has 30000 nodes (numbered 0 to 29999) where node 0 has an edge to every other node, making all other nodes leaves of the tree.
There are faster solutions, but to answer your question, you should first make sure that the recursive call returns information that does not depend on the path you have already followed. In particular, caching something that has dist
in it, reflects information that comes from outside the scope of the recursive call. That is not very practical for memoizing.
So first change the logic so that you don't pass dist
, but return information that only depends on the edge you just traversed (from parent to node). Return:
Also, you don't really use the origNode
parameter, so we can drop that one as well:
var sumOfDistancesInTree = function(n, edges) {
// * g: graph
const graph = {};
let res = [];
let cache = {};
// * g: build graph
for(let i=0; i<edges.length; ++i) {
const ind1 = parseInt(edges[i][0]);
const ind2 = parseInt(edges[i][1]);
graph[ind1] = graph[ind1] || [];
graph[ind1].push(ind2);
graph[ind2] = graph[ind2] || [];
graph[ind2].push(ind1);
}
const recur = (parent, node) => {
// if(cache[?]) {
// return cache[?]
// }
const arr = graph[node];
const result = { numPaths: 0, sumPathLengths: 0 };
for(let i=0; i<arr?.length; ++i) {
const childnode = arr[i];
if(parent === childnode) {
continue;
}
const tmp = recur(node, childnode);
result.numPaths += tmp.numPaths;
result.sumPathLengths += tmp.sumPathLengths;
}
if (parent != node) { // If we have follewed an edge:
result.numPaths += 1; // The path that stops at node
result.sumPathLengths += result.numPaths; // ...add edge to all paths
}
//cache[?] = sum;
return result;
}
// * g: graph is done
for(let i=0; i<n; ++i) {
const out = recur(i, i);
res.push(out.sumPathLengths);
}
return res;
};
Now we can think of memoization:
As the call depends on parent
and node
, the key for your cache
is this pair. You could multiply one by n
and then sum them up to get a unique key:
var sumOfDistancesInTree = function(n, edges) {
// * g: graph
const graph = {};
let res = [];
let cache = {};
// * g: build graph
for(let i=0; i<edges.length; ++i) {
const ind1 = parseInt(edges[i][0]);
const ind2 = parseInt(edges[i][1]);
graph[ind1] = graph[ind1] || [];
graph[ind1].push(ind2);
graph[ind2] = graph[ind2] || [];
graph[ind2].push(ind1);
}
const recur = (parent, node) => {
const key = parent * n + node;
if(cache[key]) {
return cache[key];
}
const arr = graph[node];
const result = { numPaths: 0, sumPathLengths: 0 };
for(let i=0; i<arr?.length; ++i) {
const childnode = arr[i];
if(parent === childnode) {
continue;
}
const tmp = recur(node, childnode);
result.numPaths += tmp.numPaths;
result.sumPathLengths += tmp.sumPathLengths;
}
if (parent != node) { // If we have follewed an edge:
result.numPaths += 1; // The path that stops at node
result.sumPathLengths += result.numPaths; // ...add edge to all paths
}
cache[key] = result;
return result;
}
// * g: graph is done
for(let i=0; i<n; ++i) {
const out = recur(i, i);
res.push(out.sumPathLengths);
}
return res;
};
This code will still take too long because even if you have cached the values for the edges, this still means that all neighbors of a node need to be visited, which can be a lot for a tree with a high branching factor.
You could improve on this to also check the cached value you might already have for a node, i.e. with key node * node + node
:
var sumOfDistancesInTree = function(n, edges) {
// * g: graph
const graph = {};
let res = [];
let cache = {};
// * g: build graph
for(let i=0; i<edges.length; ++i) {
const ind1 = parseInt(edges[i][0]);
const ind2 = parseInt(edges[i][1]);
graph[ind1] = graph[ind1] || [];
graph[ind1].push(ind2);
graph[ind2] = graph[ind2] || [];
graph[ind2].push(ind1);
}
const recur = (parent, node) => {
const key = parent * n + node;
if(cache[key]) {
return cache[key];
}
const keyNode = node * n + node; // The key for the node (not the edge)
if (cache[keyNode]) { // We have the info for the node and the opposite edge:
const nodeResult = cache[keyNode];
const keyOppositeEdge = node * n + parent;
const edgeResult = cache[keyOppositeEdge];
// Derive the info for the forward edge
const numPaths = nodeResult.numPaths - edgeResult.numPaths + 1;
const result = {
numPaths,
sumPathLengths: nodeResult.sumPathLengths - edgeResult.sumPathLengths + numPaths
};
cache[key] = result;
return result;
}
const arr = graph[node];
const result = { numPaths: 0, sumPathLengths: 0 };
for(let i=0; i<arr?.length; ++i) {
const childnode = arr[i];
if(parent === childnode) {
continue;
}
const tmp = recur(node, childnode);
result.numPaths += tmp.numPaths;
result.sumPathLengths += tmp.sumPathLengths;
}
if (parent != node) { // If we have followed an edge:
result.numPaths += 1; // The path that stops at node
result.sumPathLengths += result.numPaths; // ...add edge to all paths
}
cache[key] = result;
return result;
}
// * g: graph is done
for(let i=0; i<n; ++i) {
const out = recur(i, i);
res.push(out.sumPathLengths);
}
return res;
};
This runs in an acceptable time, but there is a lot of caching involved, which has its overhead.
The more common approach is to perform two DFS traversals from a chosen root (like node 0), one to accumulate the "downward" counts/sums, and one to accumulate the "upward" counts/sums, using the results from the first traversal.
For instance like this:
var sumOfDistancesInTree = function (n, edges) {
const graph = Array.from({length: n}, () => []);
const numPaths = Array(n).fill(1);
const sumPathLengths = Array(n).fill(0);
for (const [node1, node2] of edges) {
graph[node1].push(node2);
graph[node2].push(node1);
}
function dfs1(parent, node) {
for (const child of graph[node]) {
if (child === parent) {
continue;
}
dfs1(node, child);
numPaths[node] += numPaths[child];
sumPathLengths[node] += sumPathLengths[child] + numPaths[child];
}
}
function dfs2(parent, node) {
for (const child of graph[node]) {
if (child === parent) {
continue;
}
sumPathLengths[child] = sumPathLengths[node] - 2 * numPaths[child] + n;
dfs2(node, child);
}
}
dfs1(0, 0);
dfs2(0, 0);
return sumPathLengths;
};