pythonzlibdeflatecab

zlib: Error -3 while decompressing data: invalid distance too far back


I'm trying to write code to write/read MS-ZIP compressed CAB files. MS-ZIP is using the same deflate algorithm implemented in zlib. I've simplified the problem to this simple script which only involves zlib.

I cannot get the decompression code that matches the compression code right.

import sys, struct, zlib

MAX_CHUNK_SIZE = 100

def main():
    uncomp = (b'GQOLELNFJH@?AQE@LBA=?@N@<GJHE=EGE<FQOAM@?<IABT>EK'
              b'<=QFRPBSRGFRENSJLDFGD=LSRHIAO?FB@NP?DGDS>NGT@CCFPS'
              b'A@B=IGG<?JQBGBPLOPONU?IBBSNBK<QAFLGK@>H=CQ?BS><@UE'
              b'QGAKHML@>?JOSEQRCTP>S<?N>DNM@??ARJ>QUJSHLQN<P<>D==')

    print("python", sys.version)
    print("zlib version", zlib.ZLIB_VERSION)
    print("zlib runtime version", zlib.ZLIB_RUNTIME_VERSION)

    chunks = compress(uncomp)
    decomp = decompress(chunks)
    assert(uncomp == decomp)


def compress(data):
    start_off = 0
    data_size = len(data)
    remaining = data_size
    res = []
    zdict = b''

    while start_off < data_size:
        print("compress chunk %d"%len(res))

        chunk_size = min(MAX_CHUNK_SIZE, remaining)
        chunk = data[start_off:start_off+chunk_size]
        c = b''

        z = zlib.compressobj(wbits=-15, zdict=zdict)
        c += z.compress(chunk)
        c += z.flush(zlib.Z_FINISH)

        res.append(c)

        zdict = chunk
        start_off += chunk_size
        remaining -= chunk_size

    return res

def decompress(chunks):
    zdict = b''
    res = []

    for i, c in enumerate(chunks):
        print("decompress chunk %d"%i)
        out = b''

        z = zlib.decompressobj(wbits=-15, zdict=zdict)
        out += z.decompress(c)
        out += z.flush()
        zdict = out

        res.append(out)

    return b''.join(res)


if __name__ == '__main__':
    main()

When I run this script I get:

python 3.4.5 (default, Jul 03 2016, 13:55:08) [GCC]
zlib version 1.2.8
zlib runtime version 1.2.8
compress chunk 0
compress chunk 1
decompress chunk 0
decompress chunk 1
Traceback (most recent call last):
  File "bug.py", line 65, in <module>
    main()
  File "bug.py", line 16, in main
    decomp = decompress(chunks)
  File "bug.py", line 55, in decompress
    out += z.decompress(c)
zlib.error: Error -3 while decompressing data: invalid distance too far back

It also fails on ideone.com http://ideone.com/baD3gg which is running python 3.4.3+ zlib 1.2.8


Solution

  • It seems I was running into CPython issue #27164. As I'm writing this (2016-11-21), a fix is available in branches master and 3.5 only.

    https://bugs.python.org/issue27164