python python-3.x performance genetic-algorithm traveling-salesman

The Travelling Salesman Problem Using Genetic Algorithm

I was looking to learn about AI and found the traveling salesman problem very interesting. I also wanted to learn about genetic algorithms, so it was a fantastic combo. The task is to find the shortest distance traveling from id 1 to each location from the list once and returning to the starting location id 1

Restriction for the problem :

The location id 1 must be the starting and the ending point

The maximum distance allowed is distance <= 9000

Only max of 250000 fitness calculation is allowed

Code :

import numpy as np
import random
import operator
import pandas as pd

val10 = 0
val9 = 0
class Locations:
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def dist(self, location):
        x_dist = abs(float(self.x) - float(location.x))
        y_dist = abs(float(self.y) - float(location.y))
        # √( (x2 − x1)^2 + (𝑦2 − 𝑦1)^2 )
        dist = np.sqrt((x_dist ** 2) + (y_dist ** 2))
        return dist

    def __repr__(self):
        return "(" + str(self.x) + "," + str(self.y) + ")"


class Fitness:
    def __init__(self, route):
        self.r = route
        self.dist = 0
        self.fit = 0.0

    def route_dist(self):
        if self.dist == 0:
            path_dist = 0
            for i in range(0, len(self.r)):
                from_location = self.r[i]
                to_location = None
                if i + 1 < len(self.r):
                    to_location = self.r[i+1]
                else:
                    to_location = self.r[0]

                path_dist += from_location.dist(to_location)
            self.dist = path_dist
        return self.dist

    def route_fittness(self):
        if self.fit == 0:
            self.fit = 1 / float(self.route_dist())
        global val9
        val9 = val9 + 1    
        return self.fit


def generate_route(location_list):
    route = random.sample(location_list, len(location_list))
    return route


def gen_zero_population(size, location_list):
    population = []

    for i in range(0, size):
        population.append(generate_route(location_list))
    return population


def determine_fit(population):
    result = {}
    for i in range(0, len(population)):
        result[i] = Fitness(population[i]).route_fittness()
    global val10
    val10 = val10 + 1
    return sorted(result.items(), key=operator.itemgetter(1), reverse=True)


def fit_proportionate_selection(top_pop, elite_size):
    result = []
    df = pd.DataFrame(np.array(top_pop), columns=["index", "Fitness"])
    df['cumulative_sum'] = df.Fitness.cumsum()
    df['Sum'] = 100*df.cumulative_sum/df.Fitness.sum()

    for i in range(0, elite_size):
        result.append(top_pop[i][0])
    for i in range(0, len(top_pop) - elite_size):
        select = 100*random.random()
        for i in range(0, len(top_pop)):
            if select <= df.iat[i, 3]:
                result.append(top_pop[i][0])
                break
    return result


def select_mating_pool(populatoin, f_p_s_result):
    mating_pool = []
    for i in range(0, len(f_p_s_result)):
        index = f_p_s_result[i]
        mating_pool.append(populatoin[index])
    return mating_pool


def ordered_crossover(p1, p2):
    child, child_p1, child_p2 = ([] for i in range(3))

    first_gene = int(random.random() * len(p1))
    sec_gene = int(random.random() * len(p2))

    start_gene = min(first_gene, sec_gene)
    end_gene = max(first_gene, sec_gene)

    for i in range(start_gene, end_gene):
        child_p1.append(p1[i])

    child_p2 = [item for item in p2 if item not in child_p1]

    child = child_p1 + child_p2
    return child


def ordered_crossover_pop(mating_pool, elite_size):
    children = []

    leng = (len(mating_pool) - (elite_size))
    pool = random.sample(mating_pool, len(mating_pool))

    for i in range(0, elite_size):
        children.append(mating_pool[i])

    for i in range(0, leng):
        var = len(mating_pool)-i - 1
        child = ordered_crossover(pool[i], pool[var])
        children.append(child)
    return children


def swap_mutation(one_location, mutation_rate):
    for i in range(len(one_location)):
        if (random.random() < mutation_rate):
            swap = int(random.random() * len(one_location))

            location1 = one_location[i]
            location2 = one_location[swap]

            one_location[i] = location2
            one_location[swap] = location1
    return one_location


def pop_mutation(population, mutation_rate):
    result = []

    for i in range(0, len(population)):
        mutaded_res = swap_mutation(population[i], mutation_rate)
        result.append(mutaded_res)
    return result


def next_gen(latest_gen, elite_size, mutation_rate):
    route_rank = determine_fit(latest_gen)
    selection = fit_proportionate_selection(route_rank, elite_size)
    mating_selection = select_mating_pool(latest_gen, selection)
    children = ordered_crossover_pop(mating_selection, elite_size)
    next_generation = pop_mutation(children, mutation_rate)
    return next_generation


def generic_algor(population, pop_size, elite_size, mutation_rate, gen):
    pop = gen_zero_population(pop_size, population)
    print("Initial distance: " + str(1 / determine_fit(pop)[0][1]))

    for i in range(0, gen):
        pop = next_gen(pop, elite_size, mutation_rate)

    print("Final distance: " + str(1 / determine_fit(pop)[0][1]))
    best_route_index = determine_fit(pop)[0][0]
    best_route = pop[best_route_index]
    print(best_route)
    return best_route


def read_file(fn):
    a = []
    with open(fn) as f:
        [next(f) for _ in range(6)]
        for line in f:
            line = line.rstrip()
            if line == 'EOF':
                break

            ID, x, y = line.split()
            a.append(Locations(x=x, y=y))
    return a


location_list = read_file(r'path_of_the_file')

population = location_list
pop_size = 100
elite_size = 40
mutation_rate = 0.001
gen = 500
generic_algor(population, pop_size, elite_size, mutation_rate, gen)

print(val10)
print(val9)

Location file with x and y coordinates :

|Locations
|
|52 Locations
|
|Coordinates
|
1 565.0 575.0
2 25.0 185.0
3 345.0 750.0
4 945.0 685.0
5 845.0 655.0
6 880.0 660.0
7 25.0 230.0
8 525.0 1000.0
9 580.0 1175.0
10 650.0 1130.0
11 1605.0 620.0 
12 1220.0 580.0
13 1465.0 200.0
14 1530.0 5.0
15 845.0 680.0
16 725.0 370.0
17 145.0 665.0
18 415.0 635.0
19 510.0 875.0  
20 560.0 365.0
21 300.0 465.0
22 520.0 585.0
23 480.0 415.0
24 835.0 625.0
25 975.0 580.0
26 1215.0 245.0
27 1320.0 315.0
28 1250.0 400.0
29 660.0 180.0
30 410.0 250.0
31 420.0 555.0
32 575.0 665.0
33 1150.0 1160.0
34 700.0 580.0
35 685.0 595.0
36 685.0 610.0
37 770.0 610.0
38 795.0 645.0
39 720.0 635.0
40 760.0 650.0
41 475.0 960.0
42 95.0 260.0
43 875.0 920.0
44 700.0 500.0
45 555.0 815.0
46 830.0 485.0
47 1170.0 65.0
48 830.0 610.0
49 605.0 625.0
50 595.0 360.0
51 1340.0 725.0
52 1740.0 245.0
EOF

I have tried to tweak the value of the parameters but it has never gone below or be 9000 it is always around the upper 9500 What can I improve to get it to work for my location file?

Solution

Turns out everything is fine tweaking the distance function and the fit_proportionate_selection ensures a faster run time which makes testing the parameters faster. The other change is to have a fixed seed for the random() this way the results can be compared without much variant.

def fit_proportionate_selection(top_pop, elite_size):
    indices, fitness = zip(*top_pop)
    cumulative_sum = list(it.accumulate(fitness))
    total = cumulative_sum[-1]
    weights = [100*cs/total for cs in cumulative_sum]

    result = list(indices[:elite_size])
    
    for i in range(len(top_pop) - elite_size):
        select = random.randrange(100)
        for i, weight in enumerate(weights):
            if select <= weight:
                result.append(top_pop[i][0])
                break
    return result

Taken from my review question Code review of the same code

The parameters picked where from the comment of the review question:

pop_size = 150, elite_size=50, mutation_rate=0.0005, gen=400