pythonperformancegziplzmabz2

Python decompression relative performance?


TLDR; Of the various compression algorithms available in python gzip, bz2, lzma, etc, which has the best decompression performance?

Full discussion:

Python 3 has various modules for compressing/decompressing data including gzip, bz2 and lzma. gzip and bz2 additionally have different compression levels you can set.

If my goal is to balance file size (/compression ratio) and decompression speed (compression speed is not a concern), which is going to be the best choice? Decompression speed is more important than file size, but as the uncompressed files in question would be around 600-800MB each (32-bit RGB .png image files), and I have a dozen of them, I do want some compression.

┌────────────┬────────────────────────┬───────────────┬─────────────┐
│ Python Ver │     Library/Method     │ Read/unpack + │ Compression │
│            │                        │ Decompress (s)│    Ratio    │
├────────────┼────────────────────────┼───────────────┼─────────────┤
│ 3.7.2      │ pillow (PIL.Image)     │ 4.0           │ ~0.006      │
│ 3.7.2      │ Qt (QImage)            │ 3.8           │ ~0.006      │
│ 3.7.2      │ numpy (uncompressed)   │ 0.8           │ 1.0         │
│ 3.7.2      │ gzip (compresslevel=9) │ ?             │ ?           │
│ 3.7.2      │ gzip (compresslevel=?) │ ?             │ ?           │
│ 3.7.2      │ bz2 (compresslevel=9)  │ ?             │ ?           │
│ 3.7.2      │ bz2 (compresslevel=?)  │ ?             │ ?           │
│ 3.7.2      │ lzma                   │ ?             │ ?           │
├────────────┼────────────────────────┼───────────────┼─────────────┤
│ 3.7.3      │ ?                      │ ?             │ ?           │  
├────────────┼────────────────────────┼───────────────┼─────────────┤
│ 3.8beta1   │ ?                      │ ?             │ ?           │
├────────────┼────────────────────────┼───────────────┼─────────────┤
│ 3.8.0final │ ?                      │ ?             │ ?           │
├────────────┼────────────────────────┼───────────────┼─────────────┤
│ 3.5.7      │ ?                      │ ?             │ ?           │
├────────────┼────────────────────────┼───────────────┼─────────────┤
│ 3.6.10     │ ?                      │ ?             │ ?           │
└────────────┴────────────────────────┴───────────────┴─────────────┘

Sample .png image: As an example, take this 5.0Mb png image, a fairly high resolution image of the coastline of Alaska.

Code for the png/PIL case (load into a numpy array):

from PIL import Image
import time
import numpy

start = time.time()
FILE = '/path/to/file/AlaskaCoast.png'
Image.MAX_IMAGE_PIXELS = None
img = Image.open(FILE)
arr = numpy.array(img)
print("Loaded in", time.time()-start)

this load takes around 4.2s on my machine with Python 3.7.2.

Alternatively, I can instead load the uncompressed pickle file generated by picking the array created above.

Code for the uncompressed pickle load case:

import pickle
import time

start = time.time()    
with open('/tmp/test_file.pickle','rb') as picklefile:
  arr = pickle.load(picklefile)    
print("Loaded in", time.time()-start)

Loading from this uncompressed pickle file takes ~0.8s on my machine.


Solution

  • You can use Python-blosc

    It is very fast and for small arrays (<2GB) also quite easy to use. On easily compressable data like your example, it is often faster to compress the data for IO operations. (SATA-SSD: about 500 MB/s, PCIe- SSD: up to 3500MB/s) In the decompression step the array allocation is the most costly part. If your images are of similar shape you can avoid repeated memory allocation.

    Example

    A contigous array is assumed for the following example.

    import blosc
    import pickle
    
    def compress(arr,Path):
        #c = blosc.compress_ptr(arr.__array_interface__['data'][0], arr.size, arr.dtype.itemsize, clevel=3,cname='lz4',shuffle=blosc.SHUFFLE)
        c = blosc.compress_ptr(arr.__array_interface__['data'][0], arr.size, arr.dtype.itemsize, clevel=3,cname='zstd',shuffle=blosc.SHUFFLE)
        f=open(Path,"wb")
        pickle.dump((arr.shape, arr.dtype),f)
        f.write(c)
        f.close()
        return c,arr.shape, arr.dtype
    
    def decompress(Path):
        f=open(Path,"rb")
        shape,dtype=pickle.load(f)
        c=f.read()
        #array allocation takes most of the time
        arr=np.empty(shape,dtype)
        blosc.decompress_ptr(c, arr.__array_interface__['data'][0])
        return arr
    
    #Pass a preallocated array if you have many similar images
    def decompress_pre(Path,arr):
        f=open(Path,"rb")
        shape,dtype=pickle.load(f)
        c=f.read()
        #array allocation takes most of the time
        blosc.decompress_ptr(c, arr.__array_interface__['data'][0])
        return arr
    

    Benchmarks

    #blosc.SHUFFLE, cname='zstd' -> 4728KB,  
    %timeit compress(arr,"Test.dat")
    1.03 s ± 12.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
    #611 MB/s
    %timeit decompress("Test.dat")
    146 ms ± 481 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
    #4310 MB/s
    %timeit decompress_pre("Test.dat",arr)
    50.9 ms ± 438 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
    #12362 MB/s
    
    #blosc.SHUFFLE, cname='lz4' -> 9118KB, 
    %timeit compress(arr,"Test.dat")
    32.1 ms ± 437 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
    #19602 MB/s
    %timeit decompress("Test.dat")
    146 ms ± 332 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
    #4310 MB/s
    %timeit decompress_pre("Test.dat",arr)
    53.6 ms ± 82.9 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
    #11740 MB/s
    

    Edit

    This version is more for general use. It does handle f-contiguous, c-contiguous and non-contiguous arrays and arrays >2GB. Also have a look at bloscpack.

    import blosc
    import pickle
    
    def compress(file, arr,clevel=3,cname='lz4',shuffle=1):
        """
        file           path to file
        arr            numpy nd-array
        clevel         0..9
        cname          blosclz,lz4,lz4hc,snappy,zlib
        shuffle        0-> no shuffle, 1->shuffle,2->bitshuffle
        """
        max_blk_size=100_000_000 #100 MB 
    
        shape=arr.shape
        #dtype np.object is not implemented
        if arr.dtype==np.object:
            raise(TypeError("dtype np.object is not implemented"))
    
        #Handling of fortran ordered arrays (avoid copy)
        is_f_contiguous=False
        if arr.flags['F_CONTIGUOUS']==True:
            is_f_contiguous=True
            arr=arr.T.reshape(-1)
        else:
            arr=np.ascontiguousarray(arr.reshape(-1))
    
        #Writing
        max_num=max_blk_size//arr.dtype.itemsize
        num_chunks=arr.size//max_num
    
        if arr.size%max_num!=0:
            num_chunks+=1
    
        f=open(file,"wb")
        pickle.dump((shape,arr.size,arr.dtype,is_f_contiguous,num_chunks,max_num),f)
        size=np.empty(1,np.uint32)
        num_write=max_num
        for i in range(num_chunks):
            if max_num*(i+1)>arr.size:
                num_write=arr.size-max_num*i
            c = blosc.compress_ptr(arr[max_num*i:].__array_interface__['data'][0], num_write, 
                                   arr.dtype.itemsize, clevel=clevel,cname=cname,shuffle=shuffle)
            size[0]=len(c)
            size.tofile(f)
            f.write(c)
        f.close()
    
    def decompress(file,prealloc_arr=None):
        f=open(file,"rb")
        shape,arr_size,dtype,is_f_contiguous,num_chunks,max_num=pickle.load(f)
    
        if prealloc_arr is None:
            if prealloc_arr.flags['F_CONTIGUOUS']==True
                prealloc_arr=prealloc_arr.T
            if prealloc_arr.flags['C_CONTIGUOUS']!=True
                raise(TypeError("Contiguous array is needed"))
            arr=np.empty(arr_size,dtype)
        else:
            arr=np.frombuffer(prealloc_arr.data, dtype=dtype, count=arr_size)
    
        for i in range(num_chunks):
            size=np.fromfile(f,np.uint32,count=1)
            c=f.read(size[0])
            blosc.decompress_ptr(c, arr[max_num*i:].__array_interface__['data'][0])
        f.close()
    
        #reshape
        if is_f_contiguous:
            arr=arr.reshape(shape[::-1]).T
        else:
            arr=arr.reshape(shape)
        return arr