[SOLVED] How do I use Python 3.12 zipfile to extract all the data in a complex zip file

How do I use Python 3.12 zipfile to extract all the data in a complex zip file

I'm using this code:

with ZipFile(sourceZip, mode="r") as extraction:
    extraction.extractall(extractionPath)
    extraction.close()

What happens is it only extracts the first layer of Zip files in the sourceZip to extractionPath.

I have read "https://docs.python.org/3/library/zipfile.html#" and I need to get the code to extract all second layer of zip files.

Solution

Recursively extract the archive file contents until all nested levels have been extracted. Iterate through the contents of each zip file, detecting if a file is a zip archive, and then extracting the contents.

import os
import zipfile

def extract_zipfile_recursive(zip_file_path, extract_to):
    """
    Recursively extract the contents of a zip file and any nested zip files.
    zip_file_path: Path to the zip file.
    extract_to: Directory where extracted files should be saved.
    """
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        # Extract all contents of the zip file to the extract_to directory
        zip_ref.extractall(extract_to)

        # Iterate through each extracted item (files and directories)
        for item in zip_ref.infolist():
            # Check if the item is a directory or a file
            if item.is_dir():
                # Recursively extract contents of nested zip files in this directory
                dir_path = os.path.join(extract_to, item.filename)
                extract_zipfile_recursive(os.path.join(extract_to, item.filename), dir_path)
            elif item.filename.lower().endswith(".zip"):
                # The item is a zip file, recursively extract its contents
                nested_zip_path = os.path.join(extract_to, item.filename)
                nested_extract_to = os.path.join(extract_to, os.path.splitext(item.filename)[0])
                extract_zipfile_recursive(nested_zip_path, nested_extract_to)

# Example usage:
zip_file_path = "path/to/your/nested.zip"
extract_to_directory = "path/to/extracted/files"

# Create the directory if it doesn't exist
os.makedirs(extract_to_directory, exist_ok=True)

# Extract contents of the nested zip file recursively
extract_zipfile_recursive(zip_file_path, extract_to_directory)