pythonalgorithmdata-structuresdijkstramin-heap

Heap Dijkstra Implementation is slower than Naive Dijsktra Implementation


I have tried to implement the Naive and Heap Dijkstra as shown below but somehow my naive Dijkstra implementation is surprisingly faster. I debugged my code but couldn't understand where my problem in my implementations are.

Why is my heap-based implementation is slower than the naive implementation?

Raw data is stored here: https://www.algorithmsilluminated.org/datasets/problem9.8.txt

Data Import and Manipulation:

import time
with open("DijkstraTest2.txt", 'r') as input:
    lines = input.readlines()

lengths = {}
vertices = []
for line in lines:
    contents = line.split("\t")
    vertices.append(contents[0])
    for content in contents:
        content = content.replace('\n', '')
        if ',' in content:
            edge = contents[0] + '-' + content.split(',')[0]
            lengths[edge] = int(content.split(',')[1])

Naive Dijkstra:

def NaiveDijkstra(vertices, start_point, lengths):
    X = [start_point]
    shortest_paths = {}
    for vertex in vertices:
        if vertex == start_point:
            shortest_paths[vertex] = 0
        else:
            shortest_paths[vertex] = 999999999999
    subset = [key for key in lengths.keys() if start_point == key.split('-')[0]
              and key.split('-')[0] in X and key.split('-')[1] not in X]
    while len(subset) > 0:
        temp_min_dict = {}
        for edge in subset:
            temp_min = shortest_paths[edge.split('-')[0]] + lengths[edge]
            temp_min_dict[edge] = temp_min
        new_edge = min(temp_min_dict, key=temp_min_dict.get)
        X.append(new_edge.split('-')[1])
        shortest_paths[new_edge.split('-')[1]] = shortest_paths[new_edge.split('-')[0]] + lengths[new_edge]
        subset = []
        for key in lengths.keys():
            if key.split('-')[0] in X and key.split('-')[1] not in X:
                subset.append(key)
    return shortest_paths

start_time = time.time()
print(NaiveDijkstra(vertices = vertices, start_point = '1', lengths = lengths)['197'])
print(time.time() - start_time, "seconds")

My Heap based Dijkstra code:

class Heap:
    def __init__(self):
        self.size = 0
        self.lst = []

    def swap(self, a):
        if self.size == 1:
            return self.lst
        else:
            if a == 1:
                i = 1
            else:
                i = a // 2
            while i > 0:
                if i * 2 - 1 >= self.size:
                    break
                elif self.lst[i - 1][1] > self.lst[i * 2 - 1][1]:
                    temp = self.lst[i - 1]
                    self.lst[i - 1] = self.lst[i * 2 - 1]
                    self.lst[i * 2 - 1] = temp
                elif i * 2 >= self.size:
                    break
                elif self.lst[i - 1][1] > self.lst[i * 2][1]:
                    temp = self.lst[i - 1]
                    self.lst[i - 1] = self.lst[i * 2]
                    self.lst[i * 2] = temp
                i -= 1
        # print(f"output: {self.lst}")

    def insert(self, element):
        # print(f"input: {self.lst}")
        self.lst.append(element)
        self.size += 1
        self.swap(self.size)

    def extractmin(self):
        val = self.lst.pop(0)[0]
        self.size -= 1
        self.swap(self.size - 1)
        return val

    def delete(self, deleted):
        ix = self.lst.index(deleted)
        temp = self.lst[-1]
        self.lst[ix] = temp
        self.lst[-1] = deleted
        self.lst.pop(-1)
        self.size -= 1
        #self.swap(self.size)


def FastDijkstra(vertices, start_point, lengths):
    X = []
    h = Heap()
    width = {}
    shortest_paths = {}
    for vertex in vertices:
        if vertex == start_point:
            width[vertex] = 0
            h.insert((vertex, width[vertex]))
        else:
            width[vertex] = 999999999999
            h.insert((vertex, width[vertex]))
    while h.size > 0:
        w = h.extractmin()
        X.append(w)
        shortest_paths[w] = width[w]
        Y = set(vertices).difference(X)
        for x in X:
            for y in Y:
                key = f"{x}-{y}"
                if lengths.get(key) is not None:
                    h.delete((y, width[y]))
                    if width[y] > shortest_paths[x] + lengths[key]:
                        width[y] = shortest_paths[x] + lengths[key]
                    h.insert((y, width[y]))
    return shortest_paths


start_time = time.time()
print(FastDijkstra(vertices=vertices, start_point='1', lengths=lengths)['197'])
print(time.time() - start_time, "seconds")

Solution

  • The way you implemented the heap version is not efficient. Notably the following make it inefficient:

    If this is done right it should run faster. Certainly when you would make use of the native heapq module you'll get good running times.

    Here is a (much) faster implementation. It doesn't bother about unreachable vertices, and doesn't bother about possibly having multiple entries on the heap for the same node (with different distances). But it does start with only the starting node on the heap, and uses heapq:

    from heapq import heappush, heappop
    from collections import defaultdict
    
    def FastDijkstra(vertices, start_point, lengths):
        # Create a dictionary for the edges, keyed by source node
        edges = defaultdict(list)
        for key, length in lengths.items():
            x, y = key.split("-")
            edges[x].append((length, y))
    
        heap = [(0, start_point)]
        shortest_paths = {}
        while heap:
            cost, x = heappop(heap)
            if x in shortest_paths:
                continue  # this vertex had already been on the heap before
            shortest_paths[x] = cost
            for length, y in edges[x]:
                if y not in shortest_paths:
                    heappush(heap, (cost + length, y))
        return shortest_paths
    

    In my tests this ran hundreds times faster.