pythonfileshutil

How to automatically create numerical and ongoing folders in Python?


I have a code that takes the file of a single folder and puts them in batches of 50 into new folders. Now I would like the script to name those folders numerical and starting with 1. Another option would be that the script puts those files in already existing folders because I have a template directory with empty folders. The split is needed so the printing software will be able to spit out these batches separately. Furthermore I need to know which file is in which folder. There are around 3800 PDF files to be sorted. Once I figured out how to get these folders I can finish the project with creating a list of the files in the folder.

This is the code at the moment that creates new folders and names them by the first file in the batch.

import os
import shutil

source_directory = "//All-the-data"
Destination_base_folder = '//All-the-data/sorted'

batch_size = 50

# check if source directory exists
if os.path.exists(source_directory) and os.path.isdir(source_directory):
    # get all the files
    files = os.listdir(source_directory)

 # Sort files by filename 
    
    files.sort()

    counter = 1

    for i in range(0, len(files), batch_size):
        # create a folder for each batch
        batch_directory_name = os.path.splitext(files[i])[0]
        batch_directory_path = os.path.join(Destination_base_folder, batch_directory_name)
        os.makedirs(batch_directory_path, exist_ok=True)

        # copy files into these folders
        for j in range(min(batch_size, len(files) - i)):
            source_file_path = os.path.join(source_directory, files[i + j])
            destination_file_path = os.path.join(batch_directory_path, files[i + j])

            shutil.copy2(source_file_path, destination_file_path)

        print(f"Batch {counter} erfolgreich nach {batch_directory_path} kopiert")
        counter += 1
else:
    print("Quellordner existiert nicht: " + source_directory)

I tried the following for renaming the folders afterwards. It doesn't matter which file is in what folder.

def number_folders_chronologically(destination_folder):
    batch_folders = [folder for folder in os.listdir(destination_folder) if os.path.isdir(os.path.join(destination_folder, folder))]
    batch_folders.sort(key=lambda x: os.path.getctime(os.path.join(destination_folder, x)))

    for index, folder in enumerate(batch_folders, start=1):
        old_path = os.path.join(destination_folder, folder)
        new_folder_name = f"{index:03d}_{folder}"
        new_path = os.path.join(destination_folder, new_folder_name)
    os.rename(old_path, new_path)

I am completely new to programming with Python and didn't program in 15 years. Even back then my knowledge was basic if all. I am very sure the solution is totally easy and I just don't see it.

Thank you all for helping out.


Solution

  • The script now checks for the existence of a template directory (template_directory). If present, it copies the first folder (assuming it's empty) and renames it with a numerical prefix (batch_X). Otherwise, it creates new folders with numerical names like batch_001. A dictionary (file_to_folder_map) tracks which file goes into which folder. After processing, you can optionally print this mapping for reference.

    import os
    import shutil
    
    # Define directories
    source_directory = '//All-the-data'
    destination_base_folder = '//All-the-data/sorted'
    template_directory = '//All-the-data/sorted_template'
    
    # Set batch size
    batch_size = 50
    
    # Check if source directory exists
    if os.path.exists(source_directory) and os.path.isdir(source_directory):
    
        # Get all files and sort them by filename
        files = os.listdir(source_directory)
        files.sort()
    
        # Folder numbering and tracking
        folder_number = 1
        file_to_folder_map = {}
    
        for i in range(0, len(files), batch_size):
            # Use template directory or create a new folder with a number
            if template_directory:
                batch_directory_path = os.path.join(destination_base_folder, os.listdir(template_directory)[0])
                os. shutil.copytree(batch_directory_path, os.path.join(destination_base_folder, f"batch_{folder_number}"))
                batch_directory_path = os.path.join(destination_base_folder, f"batch_{folder_number}")
            else:
                batch_directory_path = os.path.join(destination_base_folder, f"batch_{folder_number:03d}")
                os.makedirs(batch_directory_path, exist_ok=True)
    
            # Copy files into the folder
            for j in range(min(batch_size, len(files) - i)):
                source_file_path = os.path.join(source_directory, files[i + j])
                destination_file_path = os.path.join(batch_directory_path, files[i + j])
                shutil.copy2(source_file_path, destination_file_path)
    
                # Track which file goes in which folder
                file_to_folder_map[files[i + j]] = batch_directory_path
    
            print(f"Batch {folder_number} successfully copied to {batch_directory_path}")
            folder_number += 1
    
        if file_to_folder_map:
            print("\nFile to Folder Mapping:")
            for filename, folder_path in file_to_folder_map.items():
                print(f"{filename} -> {folder_path}")
    
    else:
        print("Source folder doesn't exist: " + source_directory)