pythonjavaencodingcompressionzlib

Converting zlib related python code to java


I am trying to convert the below python code into java

 def _convert_base10encoded_to_decompressed_array(base10encodedstring) -> None:
        bytes_array = base10encodedstring.to_bytes(5000, 'big').lstrip(b'\x00')
        self.decompressed_array = zlib.decompress(bytes_array, 16+zlib.MAX_WBITS)

the java code

public void convertBase10EncodedToDecompressedArray(String base10encodedstring) throws Exception {
        // Step 1: Convert base10encodedString to a BigInteger
        BigInteger bigIntValue = new BigInteger(this.base10encodedstring);

        // Step 2: Convert BigInteger to a byte array in big-endian order
        byte[] bigIntBytes = bigIntValue.toByteArray();
        byte[] bytesArray = new byte[5000]; // Create a 5000-byte array (zero-initialized)

        // Copy the BigInteger's byte array to the end of the 5000-byte array to mimic Python's behavior
        int copyStart = 5000 - bigIntBytes.length;
        System.arraycopy(bigIntBytes, 0, bytesArray, copyStart, bigIntBytes.length);

        // Step 3: Remove leading zeros to mimic Python's lstrip(b'\x00')
        int nonZeroIndex = 0;
        while (nonZeroIndex < bytesArray.length && bytesArray[nonZeroIndex] == 0) {
            nonZeroIndex++;
        }
        byte[] trimmedBytesArray = new byte[bytesArray.length - nonZeroIndex];
        System.arraycopy(bytesArray, nonZeroIndex, trimmedBytesArray, 0, trimmedBytesArray.length);

        // Step 4: Decompress the byte array using Inflater with a header flag
        Inflater inflater = new Inflater(true); // `true` for handling zlib/gzip headers
        inflater.setInput(trimmedBytesArray);

        // Prepare output buffer for decompressed data
        byte[] output = new byte[10000]; // Adjust size as needed for decompression
        int decompressedDataLength;

        try {
            decompressedDataLength = inflater.inflate(output);
            inflater.end();
        } catch (DataFormatException e) {
            throw new Exception("Decompression failed: " + e.getMessage(), e);
        }

        // Store the decompressed data in decompressedArray
        decompressedArray = new byte[decompressedDataLength];
        System.arraycopy(output, 0, decompressedArray, 0, decompressedDataLength);
    }

The java code seems to be not doing the conversion same as python and erroring out:

Decompression failed: invalid block type
java.lang.Exception: Decompression failed: invalid block type

Here is the sample data that can be used for testing 119042439702058329181580366339137026727847632133400914982925157100269769134885477532825542376007928853630543405886877144858046550370019093958674847428579819304849334337536

this works well with python but fails in Java, generated it using the logic

sample_data = b"This is a sample data for testing zlib decompression."

# Compress the data using gzip
buf = io.BytesIO()
with gzip.GzipFile(fileobj=buf, mode='wb') as f:
    f.write(sample_data)

compressed_data = buf.getvalue()

# Convert the compressed data to a base-10 integer
compressed_int = int.from_bytes(compressed_data, 'big')

Solution

  • You can use GZIPInputStream and remove the need for Inflater. This works fine:

    private byte[] decompressedArray;
    
        public void convertBase10EncodedToDecompressedArray(String base10encodedstring) throws Exception {
            // Step 1: Convert base10encodedString to a BigInteger
            BigInteger bigIntValue = new BigInteger(base10encodedstring);
    
            // Step 2: Convert BigInteger to a byte array in big-endian order
            byte[] bigIntBytes = bigIntValue.toByteArray();
            byte[] bytesArray = new byte[5000]; // Create a 5000-byte array (zero-initialized)
    
            // Copy the BigInteger's byte array to the end of the 5000-byte array
            int copyStart = 5000 - bigIntBytes.length;
            System.arraycopy(bigIntBytes, 0, bytesArray, copyStart, bigIntBytes.length);
    
            // Step 3: Remove leading zeros
            int nonZeroIndex = 0;
            while (nonZeroIndex < bytesArray.length && bytesArray[nonZeroIndex] == 0) {
                nonZeroIndex++;
            }
            byte[] trimmedBytesArray = Arrays.copyOfRange(bytesArray, nonZeroIndex, bytesArray.length);
    
            // Step 4: Use GZIPInputStream for decompression
            try (ByteArrayInputStream bis = new ByteArrayInputStream(trimmedBytesArray);
                 GZIPInputStream gzis = new GZIPInputStream(bis);
                 ByteArrayOutputStream bos = new ByteArrayOutputStream()) {
                
                byte[] buffer = new byte[1024];
                int len;
                while ((len = gzis.read(buffer)) > 0) {
                    bos.write(buffer, 0, len);
                }
                
                // Store the decompressed data
                decompressedArray = bos.toByteArray();
                
                // Heres output for testing
                System.out.println("Decompressed content: " + new String(decompressedArray));
            }
        }