javafileoffsetdatainputstreampacking

java.io.UTFDataFormatException while reading file entry name


Im trying to "pack" several files (previously inside a jar archive) in another single non-jar file by using DataInputStream / DataOutputStream.

The idea was:

    First int = number of entries
    
    First UTF is the first entry name
    
    Second Int is entry byte array length (entry size)

    Then repeat for every entry.

The code:

 public static void main(String[] args) throws Throwable {
        test();

        System.out.println("========================================================================================");

        final DataInputStream dataInputStream = new DataInputStream(new FileInputStream(new File("C:\\Users\\Admin\\Desktop\\randomJarOut")));

        for (int int1 = dataInputStream.readInt(), i = 0; i < int1; ++i) {
            final String utf = dataInputStream.readUTF();
            System.out.println("Entry name: " + utf);
            final byte[] array = new byte[dataInputStream.readInt()];
            for (int j = 0; j < array.length; ++j) {
                array[j] = dataInputStream.readByte();
            }
            System.out.println("Entry bytes length: " + array.length);
        }

    }

Unpacking original & packing to new one:

private static void test() throws Throwable {
    JarInputStream stream = new JarInputStream(new FileInputStream(new File("C:\\Users\\Admin\\Desktop\\randomJar.jar")));
    JarInputStream stream1 = new JarInputStream(new FileInputStream(new File("C:\\Users\\Admin\\Desktop\\randomJar.jar")));

    final byte[] buffer = new byte[2048];
    final DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(new File("C:\\Users\\Admin\\Desktop\\randomJarOut")));

    int entryCount = 0;
    for (ZipEntry entry; (entry = stream.getNextJarEntry()) != null; ) {
        entryCount++;
    }

    outputStream.writeInt(entryCount);

    for (JarEntry entry; (entry = stream1.getNextJarEntry()) != null; ) {
        int entryRealSize = stream1.read(buffer);
        if (!(entryRealSize == -1)) {
            System.out.println("Writing: " + entry.getName() + " Length: " + entryRealSize);

            outputStream.writeUTF(entry.getName());
            outputStream.writeInt(entryRealSize);

            for (int len = stream1.read(buffer); len != -1; len = stream1.read(buffer)) {
                outputStream.write(buffer, 0, len);
            }
        }
    }
    outputStream.flush();
    outputStream.close();
}

Apparently im able to unpack the first entry without any problems, the second one and others:

Entry name: META-INF/services/org.jd.gui.spi.ContainerFactory
Entry bytes length: 434
Exception in thread "main" java.io.UTFDataFormatException: malformed input around byte 279
    at java.io.DataInputStream.readUTF(DataInputStream.java:656)
    at java.io.DataInputStream.readUTF(DataInputStream.java:564)
    at it.princekin.esercizio.Bootstrap.main(Bootstrap.java:29)
Disconnected from the target VM, address: '127.0.0.1:54384', transport: 'socket'

Process finished with exit code 1

Does anyone knows how to fix this? Why is this working for the first entry but not the others?


Solution

  • My take on this is that the jar file (which in fact is a zip file) has a Central Directory which is only read with the ZipFile (or JarFile) class. The Central Directory contains some data about the entries such as the size.

    I think the ZipInputStream will not read the Central Directory and thus the ZipEntry will not contain the size (returning -1 as it is unknown) whereas reading ZipEntry from ZipFile class will.

    So if you first read the size of each entry using a ZipFile and store that in a map, you can easily get it when reading the data with the ZipInputStream.

    This page includes some good examples as well.

    So my version of your code would be:

    import java.io.*;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.zip.ZipEntry;
    import java.util.zip.ZipFile;
    import java.util.zip.ZipInputStream;
    
    public class JarRepacker {
    
        public static void main(String[] args) throws Throwable {
            JarRepacker repacker = new JarRepacker();
            repacker.repackJarToMyFileFormat("commons-cli-1.3.1.jar", "randomJarOut.bin");
            repacker.readMyFileFormat("randomJarOut.bin");
        }
        
        private void repackJarToMyFileFormat(String inputJar, String outputFile) throws Throwable {
            int entryCount;
            Map<String, Integer> sizeMap = new HashMap<>();
            try (ZipFile zipFile = new ZipFile(inputJar)) {
                entryCount = zipFile.size();
                zipFile.entries().asIterator().forEachRemaining(e -> sizeMap.put(e.getName(), (int) e.getSize()));
            }
    
            try (final DataOutputStream outputStream = new DataOutputStream(new FileOutputStream(outputFile))) {
    
                outputStream.writeInt(entryCount);
    
                try (ZipInputStream stream = new ZipInputStream(new BufferedInputStream(new FileInputStream(inputJar)))) {
                    ZipEntry entry;
                    final byte[] buffer = new byte[2048];
                    while ((entry = stream.getNextEntry()) != null) {
                        final String name = entry.getName();
                        outputStream.writeUTF(name);
                        final Integer size = sizeMap.get(name);
                        outputStream.writeInt(size);
                        //System.out.println("Writing: " + name + " Size: " + size);
    
                        int len;
                        while ((len = stream.read(buffer)) > 0) {
                            outputStream.write(buffer, 0, len);
                        }
                    }
                }
                outputStream.flush();
            }
        }
    
        private void readMyFileFormat(String fileToRead) throws IOException {
            try (DataInputStream dataInputStream
                         = new DataInputStream(new BufferedInputStream(new FileInputStream(fileToRead)))) {
    
                int entries = dataInputStream.readInt();
                System.out.println("Entries in file: " + entries);
                for (int i = 1; i <= entries; i++) {
                    final String name = dataInputStream.readUTF();
                    final int size = dataInputStream.readInt();
                    System.out.printf("[%3d] Reading: %s of size: %d%n", i, name, size);
                    final byte[] array = new byte[size];
                    for (int j = 0; j < array.length; ++j) {
                        array[j] = dataInputStream.readByte();
                    }
                    // Still need to do something with this array...
                }
            }
        }
    
    }