I need to modify the ZIP compressionlevel
internally used in np.savez_compressed
. There is a feature proposal on Numpy Github, but it is not implemented yet.
I see two options:
modify the source file /numpy/lib/npyio.py
and replace zipf = zipfile_factory(file, mode="w", compression=compression)
by <idem>..., compresslevel=compresslevel)
, but this creates the burden, that on each re-install or upgrade, after pip install numpy
, I have to do this modification: this is a suboptimal solution.
How to do this?
Here I tried the second option, but it fails with ValueError: seek of closed file
, but I don't see why:
import numpy as np
def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
import zipfile
if not hasattr(file, 'write'):
file = os_fspath(file)
if not file.endswith('.npz'):
file = file + '.npz'
namedict = kwds
for i, val in enumerate(args):
key = 'arr_%d' % i
if key in namedict.keys():
raise ValueError("Cannot use un-named variables and keyword %s" % key)
namedict[key] = val
if compress:
compression = zipfile.ZIP_DEFLATED
else:
compression = zipfile.ZIP_STORED
zipf = np.lib.npyio.zipfile_factory(file, mode="w", compression=compression, compresslevel=2) # !! the only modified line !!
for key, val in namedict.items():
fname = key + '.npy'
val = np.asanyarray(val)
# always force zip64, gh-10776
with zipf.open(fname, 'w', force_zip64=True) as fid:
format.write_array(fid, val, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs)
zipf.close()
np.lib.npyio._savez = _savez
x = np.array([1, 2, 3, 4])
with open("test.npz", "wb") as f:
np.savez_compressed(f, x=x)
I found an ever simplier solution:
import numpy as np
def zipfile_factory(file, *args, **kwargs):
if not hasattr(file, 'read'):
file = os_fspath(file)
import zipfile
kwargs['allowZip64'] = True
kwargs['compresslevel'] = 4
return zipfile.ZipFile(file, *args, **kwargs)
np.lib.npyio.zipfile_factory = zipfile_factory
with open("test.npz", "wb") as f:
np.savez_compressed(f, x=np.ones(10_000_000))
Edit: old solution:
I found the solution in the meantime: format
should be replaced by np.lib.npyio.format
. Now this works:
import numpy as np
def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
import zipfile
if not hasattr(file, 'write'):
file = os_fspath(file)
if not file.endswith('.npz'):
file = file + '.npz'
namedict = kwds
for i, val in enumerate(args):
key = 'arr_%d' % i
if key in namedict.keys():
raise ValueError("Cannot use un-named variables and keyword %s" % key)
namedict[key] = val
if compress:
compression = zipfile.ZIP_DEFLATED
else:
compression = zipfile.ZIP_STORED
zipf = np.lib.npyio.zipfile_factory(file, mode="w", compression=compression, compresslevel=1)
for key, val in namedict.items():
fname = key + '.npy'
val = np.asanyarray(val)
# always force zip64, gh-10776
with zipf.open(fname, 'w', force_zip64=True) as fid:
np.lib.npyio.format.write_array(fid, val, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs)
zipf.close()
np.lib.npyio._savez = _savez
with open("test.npz", "wb") as f:
np.savez_compressed(f, x=np.array([1, 2, 3]))