I am working on a genetic algorithm in Python, where I want to use the roulette wheel selection for selecting parents. However, I came to the conclusion that with my current code, it is possible that certain parents are selected multiple times, however I want to avoid this.
Here is the first part of my code: The part where I am struggling is in ' roulette wheel selection'''.
import numpy as np
import random
import time
import copy
'''Initialisation settings'''
num_jobs = 20 # number of jobs
proc_time = [10, 10, 13, 4, 9, 4, 8, 15, 7, 1, 9, 3, 15, 9, 11, 6, 5, 14, 18, 3]
due_dates = [12, 40, 50, 16, 20, 105, 73, 45, 6, 64, 15, 6, 92, 43, 78, 21, 15, 50, 150, 99]
# inputs
population_size = int(10) # size of the population
crossover_rate = float(0.8)
mutation_rate = float(0.2)
mutation_selection_rate = float(0.5)
num_mutation_jobs = round(num_jobs * mutation_selection_rate)
num_iteration = int(2000) # amount of iterations for the GA
start_time = time.time()
'''----- Generate the initial population -----'''
Tbest = 999999999999999
best_list, best_obj = [], []
population_list = []
for i in range(population_size):
random_num = list(np.random.permutation(num_jobs)) # generate a random permutation of 0 to num_jobs
population_list.append(random_num) # add to the population_list
#print(population_list)
''' Fitness value of the initial population'''
total_chromosome = copy.deepcopy(population_list) #initial population
chrom_fitness, chrom_fit = [], []
total_fitness = 0
num_tardy=0
for i in range(population_size): # solutions (chromosomes)
ptime = 0
tardiness = 0
for j in range(num_jobs): # genes in the chromosome
ptime = ptime + proc_time[total_chromosome[i][j]] # proc time is sum of the processing times of the genes, in the order that the genes appear in the chromosome
tardiness = tardiness + max(ptime - due_dates[total_chromosome[i][j]], 0) # calc tardiness of each gene (job) in a chromosome (sequence/solution)
if ptime >= due_dates[total_chromosome[i][j]]: # if due date is exceeded, the job is tardy
num_tardy = num_tardy + 1
chrom_fitness.append(num_tardy)
chrom_fit.append(num_tardy)
total_fitness = total_fitness + chrom_fitness[i] # total sum of the fitness values of the chromosomes
num_tardy=0
#print('chrom_fitness')
#print(chrom_fitness)
'''Rank the solutions best to worst'''
chrom_fitness_rank = copy.deepcopy(chrom_fitness)
chrom_fitness_rank = np.array(chrom_fitness_rank)
#print(chrom_fitness_rank)
combined = zip(chrom_fitness_rank, population_list)
zip_sort = sorted(combined, key=lambda x: x[0])
chrom_fitness_rank, population_list = map(list,zip(*zip_sort))
#print(chrom_fitness_rank)
#print(population_list)
'''Do the required amount of iterations'''
for n in range(num_iteration):
Tbest_now = 99999999999
'''----------Roulette wheel selection----------'''
parent_list = copy.deepcopy(population_list)
pk, qk = [], []
for i in range(population_size):
pk.append(chrom_fitness[i] / total_fitness) #chrom_fitness/total_fitness for each solution/sequence, relative fitness
cum_prob = [sum(pk[:i + 1]) for i in range(len(pk))] # get cumulative probabilities
parent_number = population_size
chosen = []
for n in range(parent_number):
r=random.random()
for (i, individual) in enumerate(population_list):
if cum_prob[i]>=r:
chosen.append(list(individual))
break
#print(r)
print('choose')
print(chosen)
I thought about setting the fitness value of the chosen individual to a very high value (999999) (in my case a lower fitness value is 'better') so that there is a very low chance that this individual is selected again. However, I am not sure how to do this.
Just keep track which individuals were already selected by putting their indices into a set (already_selected
). Then select only when cum_prob[i]>=r and i not in already_selected
.
parent_number = population_size
chosen = []
already_selected = set()
for n in range(parent_number):
r=random.random()
for (i, individual) in enumerate(population_list):
if cum_prob[i]>=r and i not in already_selected:
chosen.append(list(individual))
already_selected.add(i)
break