javatarapache-commons-compress

Add files and repertories to a Tar archive with Apache Commons Compress library in non machine dependent way


I'm working on an application that need to create a tar archive in order to calculate his hash. But I encounter some problems :

I have read different post in SO and the dedicated tutorial on apache, and also the source test code of the apache commons compress jar, but I don't get the right solution.

Are there anybody that can find where my code is not correct ?

    public static File createTarFile(File[] files, File repository) {
    File tarFile = new File(TEMP_DIR + File.separator + repository.getName() + Constants.TAR_EXTENSION);
    if (tarFile.exists()) {
        tarFile.delete();
    }

    try {
        OutputStream out = new FileOutputStream(tarFile);

        TarArchiveOutputStream aos = (TarArchiveOutputStream) new ArchiveStreamFactory().createArchiveOutputStream("tar", out);

        for(File file : files){
            Utilities.addFileToTar(aos, file, "");
        }

        aos.finish();
        aos.close();
        out.close();
    } catch (Exception e) {
        e.printStackTrace();
    }

    return tarFile;
}

private static void addFileToTar(TarArchiveOutputStream tOut, File file, String base) throws IOException {

    TarArchiveEntry entry = new TarArchiveEntry(file, base + file.getName());
    entry.setModTime(0);
    entry.setSize(file.length());
    entry.setUserId(0);
    entry.setGroupId(0);
    entry.setUserName("avalon");
    entry.setGroupName("excalibur");
    entry.setMode(0100000);
    entry.setSize(file.length());
    tOut.putArchiveEntry(entry);

    if (file.isFile()) {
        IOUtils.copy(new FileInputStream(file), tOut);
        tOut.closeArchiveEntry();
    } else {
        tOut.closeArchiveEntry();
        File[] children = file.listFiles();
        if (children != null) {
            for (File child : children) {
                addFileToTar(tOut, child, file.getName());
            }
        }
    }
}

Thank you.


Solution

  • I finally found the solution after reading the post of caarlos0 : Encoding problem when compressing files with Apache Commons Compression on Linux

    Using the apache-commons-1.8.jar library, I have made a tool class that can do the job :

    You can find this code here : GitHub repository of the library MakeTar

    import java.io.BufferedOutputStream;
    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileOutputStream;
    import java.io.IOException;
    
    import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
    import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream;
    import org.apache.commons.compress.utils.IOUtils;
    
    /**
     * The Class TarArchive.
     */
    public class TarArchive {
    
        /**
         * Creates the tar of files.
         *
         * @param files the files
         * @param tarPath the tar path
         * @throws IOException Signals that an I/O exception has occurred.
         */
        public static void createTarOfFiles(String[] files, String tarPath) throws IOException
        {
            FileOutputStream fOut = null;
            BufferedOutputStream bOut = null;
            TarArchiveOutputStream tOut = null;
    
            Arrays.sort(files);
            try
            {
                fOut = new FileOutputStream(new File(tarPath));
                bOut = new BufferedOutputStream(fOut);
                tOut = new TarArchiveOutputStream(bOut);
    
                for (String file : files) {
                    addFileToTar(tOut, file, "");
                }
            }
            finally
            {
                tOut.finish();
                tOut.close();
                bOut.close();
                fOut.close();
            }
        }
    
        /**
         * Creates the tar of directory.
         *
         * @param directoryPath the directory path
         * @param tarPath the tar path
         * @throws IOException Signals that an I/O exception has occurred.
         */
        public static void createTarOfDirectory(String directoryPath, String tarPath) throws IOException
        {
            FileOutputStream fOut = null;
            BufferedOutputStream bOut = null;
            TarArchiveOutputStream tOut = null;
    
            try
            {
                fOut = new FileOutputStream(new File(tarPath));
                bOut = new BufferedOutputStream(fOut);
                tOut = new TarArchiveOutputStream(bOut);
    
                addFileToTar(tOut, directoryPath, "");
            }
            finally
            {
                tOut.finish();
                tOut.close();
                bOut.close();
                fOut.close();
            }
        }
    
        /**
         * Adds the file to tar.
         *
         * @param tOut the t out
         * @param path the path
         * @param base the base
         * @throws IOException Signals that an I/O exception has occurred.
         */
        private static void addFileToTar(TarArchiveOutputStream tOut, String path, String base) throws IOException
        {
            File f = new File(path);
            String entryName = base + f.getName();
            TarArchiveEntry tarEntry = new TarArchiveEntry(f, entryName);
    
            tOut.setLongFileMode(TarArchiveOutputStream.LONGFILE_GNU);
    
            if(f.isFile())
            {
               tarEntry.setModTime(0);
               tOut.putArchiveEntry(tarEntry);
    
               IOUtils.copy(new FileInputStream(f), tOut);
    
               tOut.closeArchiveEntry();
            }
            else
            {
                File[] children = f.listFiles();
                Arrays.sort(children);
    
                if(children != null)
                {
                    for(File child : children)
                    {
                        addFileToTar(tOut, child.getAbsolutePath(), entryName + "/");
                    }
                }
            }
        }
    }
    

    Thanks for read me.

    EDIT : Little correction, I have add the sort of the arrays.

    EDIT 2 : I have corrected the code in order to have the same archive on all machine. The hash calculated on the archive is the same everywhere.