I am trying to allocate Kubernetes pods to nodes using a genetic algorithm, where each pod is assigned to one node. Below is my implementation:
from string import ascii_lowercase
import numpy as np
import random
from itertools import compress
import math
import pandas as pd
import random
def create_pods_and_nodes(n_pods=40, n_nodes=15):
# Create pod and node names
pod = ['pod_' + str(i+1) for i in range(n_pods)]
node = ['node_' + str(i+1) for i in range(n_nodes)]
# Define CPU and RAM options
cpu = [2**i for i in range(1, 8)] # 2, 4, 8, 16, 32, 64, 128
ram = [2**i for i in range(2, 10)] # 4, 8, 16, ..., 8192
# Create the pods DataFrame
pods = pd.DataFrame({
'pod': pod,
'cpu': random.choices(cpu[0:3], k=n_pods), # Small CPU for pods
'ram': random.choices(ram[0:4], k=n_pods), # Small RAM for pods
})
# Create the nodes DataFrame
nodes = pd.DataFrame({
'node': node,
'cpu': random.choices(cpu[4:len(cpu)-1], k=n_nodes), # Larger CPU for nodes
'ram': random.choices(ram[4:len(ram)-1], k=n_nodes), # Larger RAM for nodes
})
return pods, nodes
# Example usage
pods, nodes = create_pods_and_nodes(n_pods=46, n_nodes=6)
# Display the results
print("Pods DataFrame:\n", pods.head())
print("\nNodes DataFrame:\n", nodes.head())
print(f"total CPU pods: {np.sum(pods['cpu'])}")
print(f"total RAM pods: {np.sum(pods['ram'])}")
print('\n')
print(f"total CPU nodes: {np.sum(nodes['cpu'])}")
print(f"total RAM nodes: {np.sum(nodes['ram'])}")
# Genetic Algorithm Parameters
POPULATION_SIZE = 100
GENERATIONS = 50
MUTATION_RATE = 0.1
TOURNAMENT_SIZE = 5
def create_individual():
return [random.randint(0, len(nodes) - 1) for _ in range(len(pods))]
def create_population(size):
return [create_individual() for _ in range(size)]
def fitness(individual):
total_cpu_used = np.zeros(len(nodes))
total_ram_used = np.zeros(len(nodes))
unallocated_pods = 0
for pod_idx, node_idx in enumerate(individual):
pod_cpu = pods.iloc[pod_idx]['cpu']
pod_ram = pods.iloc[pod_idx]['ram']
if total_cpu_used[node_idx] + pod_cpu <= nodes.iloc[node_idx]['cpu'] and total_ram_used[node_idx] + pod_ram <= nodes.iloc[node_idx]['ram']:
total_cpu_used[node_idx] += pod_cpu
total_ram_used[node_idx] += pod_ram
else:
unallocated_pods += 1 # Count unallocated pods
# Reward for utilizing resources and penalize for unallocated pods
return (total_cpu_used.sum() + total_ram_used.sum()) - (unallocated_pods * 10)
def select(population):
tournament = random.sample(population, TOURNAMENT_SIZE)
return max(tournament, key=fitness)
def crossover(parent1, parent2):
crossover_point = random.randint(1, len(pods) - 1)
child1 = parent1[:crossover_point] + parent2[crossover_point:]
child2 = parent2[:crossover_point] + parent1[crossover_point:]
return child1, child2
def mutate(individual):
for idx in range(len(individual)):
if random.random() < MUTATION_RATE:
individual[idx] = random.randint(0, len(nodes) - 1)
def genetic_algorithm():
population = create_population(POPULATION_SIZE)
for generation in range(GENERATIONS):
new_population = []
for _ in range(POPULATION_SIZE // 2):
parent1 = select(population)
parent2 = select(population)
child1, child2 = crossover(parent1, parent2)
mutate(child1)
mutate(child2)
new_population.extend([child1, child2])
population = new_population
# Print the best fitness of this generation
best_fitness = max(fitness(individual) for individual in population)
print(f"Generation {generation + 1}: Best Fitness = {best_fitness}")
# Return the best individual found
best_individual = max(population, key=fitness)
return best_individual
# Run the genetic algorithm
print("Starting Genetic Algorithm...")
best_allocation = genetic_algorithm()
print("Genetic Algorithm completed.\n")
# Create the allocation DataFrame
allocation_df = pd.DataFrame({
'Pod': pods['pod'],
'Node': [nodes.iloc[best_allocation[i]]['node'] for i in range(len(best_allocation))],
'Pod_Resources': [list(pods.iloc[i][['cpu', 'ram']]) for i in range(len(best_allocation))],
'Node_Resources': [list(nodes.iloc[best_allocation[i]][['cpu', 'ram']]) for i in range(len(best_allocation))]
})
# Print the allocation DataFrame
print("\nAllocation DataFrame:")
print(allocation_df)
# Summarize total CPU and RAM utilization for each node
node_utilization_df = allocation_df.groupby('Node').agg(
Total_CPU_Used=pd.NamedAgg(column='Pod_Resources', aggfunc=lambda x: sum([res[0] for res in x if res])),
Total_RAM_Used=pd.NamedAgg(column='Pod_Resources', aggfunc=lambda x: sum([res[1] for res in x if res])),
Node_CPU=pd.NamedAgg(column='Node_Resources', aggfunc=lambda x: x.iloc[0][0] if x.iloc[0] is not None else 0),
Node_RAM=pd.NamedAgg(column='Node_Resources', aggfunc=lambda x: x.iloc[0][1] if x.iloc[0] is not None else 0)
)
# Calculate CPU and RAM utilization percentages for each node
node_utilization_df['CPU_Utilization'] = (node_utilization_df['Total_CPU_Used'] / node_utilization_df['Node_CPU']) * 100
node_utilization_df['RAM_Utilization'] = (node_utilization_df['Total_RAM_Used'] / node_utilization_df['Node_RAM']) * 100
# Print the total CPU and RAM utilization for each node
print("\nTotal CPU and RAM utilization for each node:")
print(node_utilization_df)
My implementation works if the total number of CPU and/or RAM of the pods is smaller than the total CPU and/or RAM of the nodes. However, I want to make it work even if the total CPU and/or RAM of the pods exceeds the total CPU and/or RAM of the nodes, allowing for unallocated pods if they cannot be assigned. How can I achieve this?
Any suggestions or improvements would be greatly appreciated!
This is a straightforward bin packing problem. https://en.wikipedia.org/wiki/Bin_packing_problem Why tackle it with a genetic algorithm!?! That is going to be horribly slow, especially if you use python.
Implementing a standard bin packing algorithm in a native language with a decent optimizing compiler will give performance many orders of magnitude faster - a millisecond or two for your sample problem
Here is the C++ code for the implementation of the first-fit decreasing bin packing algorithm with dual resources
void pack()
{
// sort items in order of decreasing largest resource requirement sum
std::sort(
theItems.begin(), theItems.end(),
[](const cThing &a, const cThing &b) -> bool
{
int sa = a.myRes1 + a.myRes2;
int sb = b.myRes1 + b.myRes2;
return sa > sb;
});
// sort bins in order of increasing capacity sum
std::sort(
theBins.begin(), theBins.end(),
[](const cThing &a, const cThing &b) -> bool
{
int sa = a.myRes1 + a.myRes2;
int sb = b.myRes1 + b.myRes2;
return sa < sb;
});
// fit each item into the smallest bin that fits
for (cThing &item : theItems)
{
for (cThing &bin : theBins)
{
if (item.myRes1 > bin.myRes1 ||
item.myRes2 > bin.myRes2)
continue;
bin.pack( item );
break;
}
}
}
The output for a run:
All iems packed
node_3 contains: pod_1 pod_21
node_13 contains: pod_19 pod_15
node_11 contains: pod_31 pod_7
node_10 contains: pod_8 pod_28 pod_24 pod_17 pod_32
node_14 contains: pod_16 pod_36 pod_30 pod_29 pod_6 pod_34 pod_22 pod_9 pod_20 pod_38
node_15 contains: pod_37 pod_12 pod_13 pod_25 pod_26 pod_4 pod_3 pod_33 pod_27 pod_35 pod_2 pod_39 pod_23
node_4 contains: pod_5 pod_18 pod_14 pod_11 pod_10 pod_40
node_9 is empty
node_1 is empty
node_8 is empty
node_7 is empty
node_5 is empty
node_12 is empty
node_2 is empty
node_6 is empty
The above run uses the setup posted in the question.
You added:
I want to make it work even if the total CPU and/or RAM of the pods exceeds the total CPU and/or RAM of the nodes, allowing for unallocated pods if they cannot be assigned. How can I achieve this?
So I reduce the number of nodes created ( from 15 to 4 ) so that not all pods can be fitted. Below is the result of this run showing that the code handles this naturally
pod_12 pod_25 pod_26 pod_4 pod_3 pod_33 pod_27 pod_35 pod_2 pod_39 pod_5 pod_23 pod_18 pod_14 pod_11 pod_10 pod_40
17 items did not fit
node_3 contains: pod_1 pod_21
node_4 contains: pod_19 pod_15 pod_31 pod_7
node_1 contains: pod_8 pod_28 pod_24 pod_17 pod_32 pod_16 pod_36 pod_30 pod_29 pod_6 pod_34
node_2 contains: pod_22 pod_9 pod_20 pod_38 pod_37 pod_13
Complete application code at https://github.com/JamesBremner/so79049807