pythonarraysnumpycompressionmonkeypatching

How to monkey-patch np.savez_compressed to add compression level, without editing numpy's source files?


I need to modify the ZIP compressionlevel internally used in np.savez_compressed. There is a feature proposal on Numpy Github, but it is not implemented yet.

I see two options:

How to do this?

Here I tried the second option, but it fails with ValueError: seek of closed file, but I don't see why:

import numpy as np

def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
    import zipfile
    if not hasattr(file, 'write'):
        file = os_fspath(file)
        if not file.endswith('.npz'):
            file = file + '.npz'
    namedict = kwds
    for i, val in enumerate(args):
        key = 'arr_%d' % i
        if key in namedict.keys():
            raise ValueError("Cannot use un-named variables and keyword %s" % key)
        namedict[key] = val
    if compress:
        compression = zipfile.ZIP_DEFLATED
    else:
        compression = zipfile.ZIP_STORED
    zipf = np.lib.npyio.zipfile_factory(file, mode="w", compression=compression, compresslevel=2)  # !! the only modified line !!
    for key, val in namedict.items():
        fname = key + '.npy'
        val = np.asanyarray(val)
        # always force zip64, gh-10776
        with zipf.open(fname, 'w', force_zip64=True) as fid:
            format.write_array(fid, val, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs)
    zipf.close()

np.lib.npyio._savez = _savez    

x = np.array([1, 2, 3, 4])
with open("test.npz", "wb") as f:
    np.savez_compressed(f, x=x)

Solution

  • I found an ever simplier solution:

    import numpy as np
    def zipfile_factory(file, *args, **kwargs):
        if not hasattr(file, 'read'):
            file = os_fspath(file)
        import zipfile
        kwargs['allowZip64'] = True
        kwargs['compresslevel'] = 4
        return zipfile.ZipFile(file, *args, **kwargs)
    np.lib.npyio.zipfile_factory = zipfile_factory
    with open("test.npz", "wb") as f:
        np.savez_compressed(f, x=np.ones(10_000_000))
    

    Edit: old solution:

    I found the solution in the meantime: format should be replaced by np.lib.npyio.format. Now this works:

    import numpy as np
    
    def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
        import zipfile
        if not hasattr(file, 'write'):
            file = os_fspath(file)
            if not file.endswith('.npz'):
                file = file + '.npz'
        namedict = kwds
        for i, val in enumerate(args):
            key = 'arr_%d' % i
            if key in namedict.keys():
                raise ValueError("Cannot use un-named variables and keyword %s" % key)
            namedict[key] = val
        if compress:
            compression = zipfile.ZIP_DEFLATED
        else:
            compression = zipfile.ZIP_STORED
        zipf = np.lib.npyio.zipfile_factory(file, mode="w", compression=compression, compresslevel=1)
        for key, val in namedict.items():
            fname = key + '.npy'
            val = np.asanyarray(val)
            # always force zip64, gh-10776
            with zipf.open(fname, 'w', force_zip64=True) as fid:
                np.lib.npyio.format.write_array(fid, val, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs)
        zipf.close()
    
    np.lib.npyio._savez = _savez    
    
    with open("test.npz", "wb") as f:
        np.savez_compressed(f, x=np.array([1, 2, 3]))