I was looking to learn about AI and found the traveling salesman problem very interesting. I also wanted to learn about genetic algorithms, so it was a fantastic combo. The task is to find the shortest distance traveling from id 1
to each location from the list once and returning to the starting location id 1
Restriction for the problem :
The location
id 1
must be the starting and the ending pointThe maximum distance allowed is
distance <= 9000
Only max of
250000
fitness calculation is allowed
Code :
import numpy as np
import random
import operator
import pandas as pd
val10 = 0
val9 = 0
class Locations:
def __init__(self, x, y):
self.x = x
self.y = y
def dist(self, location):
x_dist = abs(float(self.x) - float(location.x))
y_dist = abs(float(self.y) - float(location.y))
# √( (x2 − x1)^2 + (𝑦2 − 𝑦1)^2 )
dist = np.sqrt((x_dist ** 2) + (y_dist ** 2))
return dist
def __repr__(self):
return "(" + str(self.x) + "," + str(self.y) + ")"
class Fitness:
def __init__(self, route):
self.r = route
self.dist = 0
self.fit = 0.0
def route_dist(self):
if self.dist == 0:
path_dist = 0
for i in range(0, len(self.r)):
from_location = self.r[i]
to_location = None
if i + 1 < len(self.r):
to_location = self.r[i+1]
else:
to_location = self.r[0]
path_dist += from_location.dist(to_location)
self.dist = path_dist
return self.dist
def route_fittness(self):
if self.fit == 0:
self.fit = 1 / float(self.route_dist())
global val9
val9 = val9 + 1
return self.fit
def generate_route(location_list):
route = random.sample(location_list, len(location_list))
return route
def gen_zero_population(size, location_list):
population = []
for i in range(0, size):
population.append(generate_route(location_list))
return population
def determine_fit(population):
result = {}
for i in range(0, len(population)):
result[i] = Fitness(population[i]).route_fittness()
global val10
val10 = val10 + 1
return sorted(result.items(), key=operator.itemgetter(1), reverse=True)
def fit_proportionate_selection(top_pop, elite_size):
result = []
df = pd.DataFrame(np.array(top_pop), columns=["index", "Fitness"])
df['cumulative_sum'] = df.Fitness.cumsum()
df['Sum'] = 100*df.cumulative_sum/df.Fitness.sum()
for i in range(0, elite_size):
result.append(top_pop[i][0])
for i in range(0, len(top_pop) - elite_size):
select = 100*random.random()
for i in range(0, len(top_pop)):
if select <= df.iat[i, 3]:
result.append(top_pop[i][0])
break
return result
def select_mating_pool(populatoin, f_p_s_result):
mating_pool = []
for i in range(0, len(f_p_s_result)):
index = f_p_s_result[i]
mating_pool.append(populatoin[index])
return mating_pool
def ordered_crossover(p1, p2):
child, child_p1, child_p2 = ([] for i in range(3))
first_gene = int(random.random() * len(p1))
sec_gene = int(random.random() * len(p2))
start_gene = min(first_gene, sec_gene)
end_gene = max(first_gene, sec_gene)
for i in range(start_gene, end_gene):
child_p1.append(p1[i])
child_p2 = [item for item in p2 if item not in child_p1]
child = child_p1 + child_p2
return child
def ordered_crossover_pop(mating_pool, elite_size):
children = []
leng = (len(mating_pool) - (elite_size))
pool = random.sample(mating_pool, len(mating_pool))
for i in range(0, elite_size):
children.append(mating_pool[i])
for i in range(0, leng):
var = len(mating_pool)-i - 1
child = ordered_crossover(pool[i], pool[var])
children.append(child)
return children
def swap_mutation(one_location, mutation_rate):
for i in range(len(one_location)):
if (random.random() < mutation_rate):
swap = int(random.random() * len(one_location))
location1 = one_location[i]
location2 = one_location[swap]
one_location[i] = location2
one_location[swap] = location1
return one_location
def pop_mutation(population, mutation_rate):
result = []
for i in range(0, len(population)):
mutaded_res = swap_mutation(population[i], mutation_rate)
result.append(mutaded_res)
return result
def next_gen(latest_gen, elite_size, mutation_rate):
route_rank = determine_fit(latest_gen)
selection = fit_proportionate_selection(route_rank, elite_size)
mating_selection = select_mating_pool(latest_gen, selection)
children = ordered_crossover_pop(mating_selection, elite_size)
next_generation = pop_mutation(children, mutation_rate)
return next_generation
def generic_algor(population, pop_size, elite_size, mutation_rate, gen):
pop = gen_zero_population(pop_size, population)
print("Initial distance: " + str(1 / determine_fit(pop)[0][1]))
for i in range(0, gen):
pop = next_gen(pop, elite_size, mutation_rate)
print("Final distance: " + str(1 / determine_fit(pop)[0][1]))
best_route_index = determine_fit(pop)[0][0]
best_route = pop[best_route_index]
print(best_route)
return best_route
def read_file(fn):
a = []
with open(fn) as f:
[next(f) for _ in range(6)]
for line in f:
line = line.rstrip()
if line == 'EOF':
break
ID, x, y = line.split()
a.append(Locations(x=x, y=y))
return a
location_list = read_file(r'path_of_the_file')
population = location_list
pop_size = 100
elite_size = 40
mutation_rate = 0.001
gen = 500
generic_algor(population, pop_size, elite_size, mutation_rate, gen)
print(val10)
print(val9)
Location file with x
and y
coordinates :
|Locations
|
|52 Locations
|
|Coordinates
|
1 565.0 575.0
2 25.0 185.0
3 345.0 750.0
4 945.0 685.0
5 845.0 655.0
6 880.0 660.0
7 25.0 230.0
8 525.0 1000.0
9 580.0 1175.0
10 650.0 1130.0
11 1605.0 620.0
12 1220.0 580.0
13 1465.0 200.0
14 1530.0 5.0
15 845.0 680.0
16 725.0 370.0
17 145.0 665.0
18 415.0 635.0
19 510.0 875.0
20 560.0 365.0
21 300.0 465.0
22 520.0 585.0
23 480.0 415.0
24 835.0 625.0
25 975.0 580.0
26 1215.0 245.0
27 1320.0 315.0
28 1250.0 400.0
29 660.0 180.0
30 410.0 250.0
31 420.0 555.0
32 575.0 665.0
33 1150.0 1160.0
34 700.0 580.0
35 685.0 595.0
36 685.0 610.0
37 770.0 610.0
38 795.0 645.0
39 720.0 635.0
40 760.0 650.0
41 475.0 960.0
42 95.0 260.0
43 875.0 920.0
44 700.0 500.0
45 555.0 815.0
46 830.0 485.0
47 1170.0 65.0
48 830.0 610.0
49 605.0 625.0
50 595.0 360.0
51 1340.0 725.0
52 1740.0 245.0
EOF
I have tried to tweak the value of the parameters but it has never gone below or be 9000
it is always around the upper 9500
What can I improve to get it to work for my location file?
Turns out everything is fine tweaking the distance
function and the fit_proportionate_selection
ensures a faster run time which makes testing the parameters faster. The other change is to have a fixed seed for the random()
this way the results can be compared without much variant.
def fit_proportionate_selection(top_pop, elite_size):
indices, fitness = zip(*top_pop)
cumulative_sum = list(it.accumulate(fitness))
total = cumulative_sum[-1]
weights = [100*cs/total for cs in cumulative_sum]
result = list(indices[:elite_size])
for i in range(len(top_pop) - elite_size):
select = random.randrange(100)
for i, weight in enumerate(weights):
if select <= weight:
result.append(top_pop[i][0])
break
return result
Taken from my review question Code review of the same code
The parameters picked where from the comment of the review question:
pop_size = 150, elite_size=50, mutation_rate=0.0005, gen=400