I have a code that takes the file of a single folder and puts them in batches of 50 into new folders. Now I would like the script to name those folders numerical and starting with 1. Another option would be that the script puts those files in already existing folders because I have a template directory with empty folders. The split is needed so the printing software will be able to spit out these batches separately. Furthermore I need to know which file is in which folder. There are around 3800 PDF files to be sorted. Once I figured out how to get these folders I can finish the project with creating a list of the files in the folder.
This is the code at the moment that creates new folders and names them by the first file in the batch.
import os
import shutil
source_directory = "//All-the-data"
Destination_base_folder = '//All-the-data/sorted'
batch_size = 50
# check if source directory exists
if os.path.exists(source_directory) and os.path.isdir(source_directory):
# get all the files
files = os.listdir(source_directory)
# Sort files by filename
files.sort()
counter = 1
for i in range(0, len(files), batch_size):
# create a folder for each batch
batch_directory_name = os.path.splitext(files[i])[0]
batch_directory_path = os.path.join(Destination_base_folder, batch_directory_name)
os.makedirs(batch_directory_path, exist_ok=True)
# copy files into these folders
for j in range(min(batch_size, len(files) - i)):
source_file_path = os.path.join(source_directory, files[i + j])
destination_file_path = os.path.join(batch_directory_path, files[i + j])
shutil.copy2(source_file_path, destination_file_path)
print(f"Batch {counter} erfolgreich nach {batch_directory_path} kopiert")
counter += 1
else:
print("Quellordner existiert nicht: " + source_directory)
I tried the following for renaming the folders afterwards. It doesn't matter which file is in what folder.
def number_folders_chronologically(destination_folder):
batch_folders = [folder for folder in os.listdir(destination_folder) if os.path.isdir(os.path.join(destination_folder, folder))]
batch_folders.sort(key=lambda x: os.path.getctime(os.path.join(destination_folder, x)))
for index, folder in enumerate(batch_folders, start=1):
old_path = os.path.join(destination_folder, folder)
new_folder_name = f"{index:03d}_{folder}"
new_path = os.path.join(destination_folder, new_folder_name)
os.rename(old_path, new_path)
I am completely new to programming with Python and didn't program in 15 years. Even back then my knowledge was basic if all. I am very sure the solution is totally easy and I just don't see it.
Thank you all for helping out.
The script now checks for the existence of a template directory (template_directory). If present, it copies the first folder (assuming it's empty) and renames it with a numerical prefix (batch_X). Otherwise, it creates new folders with numerical names like batch_001. A dictionary (file_to_folder_map) tracks which file goes into which folder. After processing, you can optionally print this mapping for reference.
import os
import shutil
# Define directories
source_directory = '//All-the-data'
destination_base_folder = '//All-the-data/sorted'
template_directory = '//All-the-data/sorted_template'
# Set batch size
batch_size = 50
# Check if source directory exists
if os.path.exists(source_directory) and os.path.isdir(source_directory):
# Get all files and sort them by filename
files = os.listdir(source_directory)
files.sort()
# Folder numbering and tracking
folder_number = 1
file_to_folder_map = {}
for i in range(0, len(files), batch_size):
# Use template directory or create a new folder with a number
if template_directory:
batch_directory_path = os.path.join(destination_base_folder, os.listdir(template_directory)[0])
os. shutil.copytree(batch_directory_path, os.path.join(destination_base_folder, f"batch_{folder_number}"))
batch_directory_path = os.path.join(destination_base_folder, f"batch_{folder_number}")
else:
batch_directory_path = os.path.join(destination_base_folder, f"batch_{folder_number:03d}")
os.makedirs(batch_directory_path, exist_ok=True)
# Copy files into the folder
for j in range(min(batch_size, len(files) - i)):
source_file_path = os.path.join(source_directory, files[i + j])
destination_file_path = os.path.join(batch_directory_path, files[i + j])
shutil.copy2(source_file_path, destination_file_path)
# Track which file goes in which folder
file_to_folder_map[files[i + j]] = batch_directory_path
print(f"Batch {folder_number} successfully copied to {batch_directory_path}")
folder_number += 1
if file_to_folder_map:
print("\nFile to Folder Mapping:")
for filename, folder_path in file_to_folder_map.items():
print(f"{filename} -> {folder_path}")
else:
print("Source folder doesn't exist: " + source_directory)