windowsparsingdirectorysubdirectoryfiletree

print the folder tree with sizes, windows


I'm looking for a way to print (or write to file) the folder tree of my windows PC, including the size of each folder - but not of single files. The output should look like: - My Documents/pictures/selfies/ - 100MB - My Documents/movies/ - 1000MB - My Music/Mozart/ - 300MB ..and so forth

notes: 1) single files can be listed if there's no way around it, but in a way that I can programmatically (i.e. with parsing or regex) remove them from the list later..

2) the aim is to create a hierarchical file like http://bl.ocks.org/mbostock/raw/1283663/readme.json so a .json would be best, but not a requirement: I'll parse the text and create a .json file as a second step.

3) this is what I have, but I have no idea on how to convert it to my needs.

@echo off
setlocal disabledelayedexpansion

set "folder=%~1"
if not defined folder set "folder=%cd%"

for /d %%a in ("%folder%\*") do (
    set "size=0"
    for /f "tokens=3,5" %%b in ('dir /-c /a /w /s "%%~fa\*" 2^>nul ^| findstr /b /c:"  "') do if "%%~c"=="" set "size=%%~b"
    setlocal enabledelayedexpansion
    echo(%%~nxa # !size!
    endlocal
)

endlocal

4) I can only work in python and batch scripting on my machine :(

Thanks all AC


Solution

  • Here's the Python (2.7 compatible syntax) script:

    import sys
    from os import stat, getcwd
    from os.path import isdir, isfile, join
    from glob import glob
    from pprint import pprint
    
    NAME_KEY = "name"
    SIZE_KEY = "size"
    CHILDREN_KEY = "children"
    
    def _iter_path_w_files(path):
        if isfile(path):
            return {NAME_KEY: path, SIZE_KEY: stat(path).st_size}
        elif isdir(path):
            ret = {NAME_KEY: path, CHILDREN_KEY: []}
            for child in glob(join(path, "*")):
                ret[CHILDREN_KEY].append(_iter_path_w_files(child))
            return ret
        else:  # For readability only
            return None
    
    def _iter_path_wo_files(path):
        ret = {NAME_KEY: path, SIZE_KEY: 0}
        for child in glob(join(path, "*")):
            if isfile(child):
                ret[SIZE_KEY] += stat(child).st_size
            else:
                child_ret = _iter_path_wo_files(child)
                ret.setdefault(CHILDREN_KEY, []).append(child_ret)
                ret[SIZE_KEY] += child_ret[SIZE_KEY]
        return ret
    
    def iter_path(path, show_files=True):
        if show_files:
            return _iter_path_w_files(path)
        else:
            if isfile(path):
                return stat(path).st_size
            elif isdir(path):
                return _iter_path_wo_files(path)
            else:  # For readability only
                return None
    
    
    if __name__ == "__main__":
        if len(sys.argv) > 1:
            path = sys.argv[1]
        else:
            path = getcwd()
    
        files = False  # Toggle this var if you want the files reported or not
    
        d = iter_path(path, files)
        pprint(d)
    

    For a directory tree like (numbers next to files, are their sizes):

    the outputs would be:

    files = False:

    {'children': [
                  {'children': [
                                {'children': [
                                              {'name': 'dir0\\dir00\\dir000\\dir0000',
                                               'size': 9L
                                              }
                                             ],
                                 'name': 'dir0\\dir00\\dir000',
                                 'size': 9L
                                }
                               ],
                   'name': 'dir0\\dir00',
                   'size': 16L
                  },
                  {'children': [
                                {'name': 'dir0\\dir01\\dir010',
                                 'size': 0
                                }
                               ],
                   'name': 'dir0\\dir01',
                   'size': 7L
                  }
                 ],
     'name': 'dir0',
     'size': 29L
    }
    

    files = True:

    {'name': 'dir0',
     'children': [
                  {'name': 'dir0\\dir00',
                   'children': [
                                {'name': 'dir0\\dir00\\dir000',
                                 'children': [
                                              {'name': 'dir0\\dir00\\dir000\\dir0000',
                                               'children': [
                                                            {'name': 'dir0\\dir00\\dir000\\dir0000\\file00000',
                                                             'size': 9L
                                                            }
                                                           ]
                                              }
                                             ]
                                },
                                {'name': 'dir0\\dir00\\file000',
                                 'size': 7L
                                }
                               ]
                  },
                  {'name': 'dir0\\dir01',
                   'children': [
                                {'name': 'dir0\\dir01\\dir010',
                                 'children': []
                                },
                                {'name': 'dir0\\dir01\\file010',
                                 'size': 7L
                                }
                                ]
                  },
                  {'name': 'dir0\\file00',
                   'size': 6L
                  }
                 ]
    }
    

    Those are python dictionaries (I formatted them for readability) which are perfectly compatible with json (you can try: json.dumps(d) (where d is a dictionary)).