algorithmtreestring-formattingdepth-first-searchbreadth-first-search

String representation of a tree (nodes) data structure with paths from left to right


Given a tree-like data structure of nodes with each node having a property children that contains the children nodes from left to right, I wish to create a string representation of this data structure such that each "vertical column" of the representation from left to right is a path leading to a leaf node. The first column would be the leftmost path, and the last column would be the rightmost path.

For example, here is an example of how this would look like for a case in which each node's representation is a single character:

Example 1:

Root
├─T
├─E
├─S
├─T─────────┬───┐
├─N─┬───┐   ├─V ├─W
├─A ├─E ├─I ├─A ├─O
├─S ├─X ├─C ├─L ├─W─┐
├─T └─T └─E ├─U ├─H ├─T
└─Y         └─E ├─U ├─H
                └─H ├─I
                    ├─S
                    ├─I
                    ├─S
                    ├─L
                    ├─O
                    ├─N
                    └─G

Here is a case in which each node's string representation is a longer string:

Example 2:

Root
├─Apple─────────┐
├─Banana─┐      ├─Elderberry─┬───────┐
└─Cherry └─Date └─Fig        └─Grape └─Hazelnut

Given that each line in the above string representations each represent a level (are at a specific height) of the tree, I was under the impression that BFS was the way to go in creating the string representation.

However, as can be seen in the above examples, it does not seem as simple as that since the line depends on the nodes to the left of the current node and the lengths of their string representations. So, my thought is that you may have to combine the BFS with DFS in some way to consider the number of children to the left of the current node. However, I'm struggling in figuring out how one would do this.

This isn't a language-specific question as I am looking for more of a general algorithm that I can adopt to my language of choice. Though, I would appreciate the algorithm being made efficient where possible. Additionally, please assume that tree nodes are immutable and therefore cannot be modified to store additional information for the purposes of this algorithm.


Solution

  • As you indicate, with BFS you lack the spacing information that you need from the deeper levels. So DFS seems a more natural candidate, as then you can let that information bubble up out of recursion.

    Here is a possible implementation in JavaScript. The script below includes the creation of the two example trees and prints the result in the console.

    class Node {
        constructor(value, ...children) {
            this.value = value;
            this.children = children;
        }
    }
    
    ///// Utility functions to work with arrays of strings ///////
    
    function widthOf(lines) { // Length of the longest string in the array
        return Math.max(...lines.map(line => line.length));
    }
    
    function mergeLines(a, width, b) {
        // The strings in the first array (a) get extended with those of the second array (b)
        // If b has more strings than a, then strings are added to a so to match its length
        // When concatenating strings from a and b, the strings from a are first padded with spaces
        // so that all strings from b start at the same offset
        for (let i = 0; i < b.length; i++) {
            if (i === a.length) a.push("");
            a[i] = a[i].padEnd(width) + b[i];
        }
    }
    
    ////// Main algorithm ///////
    
    function toLines(root) {
        // Returns an array of one-line strings. It is for the caller to concatenate them with newline separators.
        let label = root.value.toString();
        if (root.children.length == 0) return ["└─" + label];
        let topLine = "├─" + label;
        let split = "─";
        let minWidth = topLine.length;
        const allLines = [];
        let offset = 0;
        for (const child of root.children) {
            const childLines = toLines(child); // Recursion: DFS traversal
            const childWidth = Math.max(minWidth, widthOf(childLines));
            mergeLines(allLines, offset, childLines);
            if (minWidth == 0) { // Not the first child?
                topLine = (topLine + split).padEnd(offset, "─");
                split = "┬";
            }
            offset += childWidth + 1;
            minWidth = 0; 
        }
        if (root.children.length > 1) topLine += "┐";
        allLines.unshift(topLine); // Prefix a new string with the value of the current node
        return allLines;
    }
    
    /////////// Utility functions to create a tree for the demo ///////////////////
    
    function chain(...values) {
        // Create a (vertical) branch of nodes
        let children = [];
        for (let i = values.length - 1; i >= 0; i--) {
            children = [new Node(values[i], ...children)]; // An array with just one child
        }
        return children[0]; // The root of the created branch
    }
    
    function down(root, levels) {
        // Return the node that is #levels below the given root, on the rightmost "branch"
        while (levels--) {
            root = root.children.at(-1); // rightmost child
        }
        return root;
    }
    
    //////// Example trees ////////////////////////////////////////
    
    function exampleTree1() {
        const root = chain(..."TESTNASTY");
        down(root, 4).children.push(chain(..."EXT"), chain(..."ICE"));
        down(root, 3).children.push(chain(..."VALUE"), chain(..."WOWHUH"));
        down(root, 6).children.push(chain(..."THISISLONG"));
        return root;
    }
    
    function exampleTree2() {
        const root = chain("Apple", "Banana", "Cherry");
        down(root, 2).children.push(chain("Date"));
        down(root, 1).children.push(chain("Elderberry", "Fig"));
        down(root, 2).children.push(chain("Grape"), chain("Hazelnut"));
        return root;
    }
    
    //////// Demo ////////
    
    for (const root of [exampleTree1(), exampleTree2()]) {
        console.log(toLines(root).join("\n"));
    }

    Note that the "Root" label that your desired output showed is not included here. It seemed to be out of the normal formatting rules. But you can obviously just print that outside of this function.