I started using Dijkstra's algorithm in python using Networkx, but now I want to speed up my code using c++ and I chose Boost to work with graphs. My surprise was that I didn't see any speed up and that actually Python was faster in calculating the shortest paths. I am very novice to c++ so I don't know if I am doing anything wrong. Here's a sample code that reproduces the problem:
Python code:
import networkx as nx
import csv
import time
import numpy as np
def write_graph_to_csv(G):
# Open a CSV file in write mode
with open('/Users/robertbenassai/Documents/UOC/k_shortest_path_betweenness/KSP_cpp/graph.csv',
'w', newline='') as csvfile:
# Create a CSV writer object
csvwriter = csv.writer(csvfile)
# Write the header row (optional)
csvwriter.writerow(['vertex1', 'vertex2', 'edge_weight'])
# Write edges and their weights to the CSV file
for u, v, weight in G.edges(data='length'):
csvwriter.writerow([u, v, weight])
print('Graph has been written to graph.csv')
graph = nx.random_geometric_graph(100, 0.2)
pos_RG = nx.get_node_attributes(graph, 'pos')
edge_lens = {edge: np.linalg.norm(np.array(pos_RG[edge[1]]) - np.array(pos_RG[edge[0]])) for edge
in graph.edges}
nx.set_edge_attributes(graph, edge_lens, name = 'length')
nx.draw_networkx(graph, pos_RG)
times_del = []
for i in range(100):
t1 = time.time()
nx.single_source_dijkstra(graph, 1, weight = "length")
times_del.append(time.time()-t1)
# print(time.time()-t1)
print(np.mean(times_del))
write_graph_to_csv(graph)
C++ code:
#include <iostream>
#include <vector>
#include <chrono>
#include <numeric>
#include <boost/property_map/dynamic_property_map.hpp>
#include <boost/property_map/property_map.hpp>
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp>
#include <boost/tokenizer.hpp>
using namespace boost;
using namespace std::chrono;
struct NodeProperties {
int id;
};
typedef boost::adjacency_list<
boost::listS,
boost::vecS,
boost::undirectedS,
NodeProperties,
boost::property<boost::edge_weight_t, double>
> Graph;
// typedef adjacency_list<listS, vecS, undirectedS,
// no_property, property<edge_weight_t, double> > Graph;
typedef graph_traits<Graph>::vertex_descriptor Vertex;
typedef graph_traits<Graph>::edge_descriptor Edge;
typedef std::pair<int, int> EdgePair;
int main() {
/*
READING CSV FILE: EDGE[0], EDGE[1], WEIGHT
*/
Graph delaunay(0);
std::ifstream file("graph.csv");
std::string line;
std::getline(file, line);
while (std::getline(file, line)) {
boost::tokenizer<boost::escaped_list_separator<char>> tokens(line);
auto tokenIterator = tokens.begin();
int vertex1 = std::stoi(*tokenIterator++);
int vertex2 = std::stoi(*tokenIterator++);
double weight = std::stod(*tokenIterator);
// Add edge to the graph with the given weight
Edge e = add_edge(vertex1, vertex2, delaunay).first;
put(edge_weight, delaunay, e, weight);
// boost::add_edge(vertex1, vertex2, EdgeProperties{weight}, delaunay);
}
std::vector<int> path(num_vertices(delaunay), -1); // Initialize path with -1 for all vertices
std::vector<double> distances(num_vertices(delaunay)); // Store distances for Dijkstra's algorithm
std::vector<double> times(100);
for (int i = 0; 100>i; i++){
auto t1 = high_resolution_clock::now();
boost::dijkstra_shortest_paths(delaunay, 1,
predecessor_map(make_iterator_property_map(path.begin(), get(vertex_index, delaunay)))
.distance_map(make_iterator_property_map(distances.begin(), get(vertex_index, delaunay))));
auto t2 = high_resolution_clock::now();
std::chrono::duration<float> deltat = (t2 - t1);
std::cout << "Dijkstra time: "
<< deltat.count() << " seconds" << std::endl;
times[i] = deltat.count();
}
double mean_t = std::accumulate(times.begin(), times.end(), 0.0) / times.size();
printf("mean time: %lf", mean_t);
return 0;
}
I have tried running different graphs but the "problem" persists. Can someone try it and see if it is just me?
EDIT:
I am using a makefile to compile it:
INC_DIRS = /opt/homebrew/include
LIB_DIRS = /opt/homebrew/lib
CPPFLAGS += -std=c++17
# Link igraph and other libraries
LDFLAGS = -ligraph -lboost_filesystem -lboost_graph
Yen: k_shortest_paths.cpp
$(CXX) $(CFLAGS) $(CPPFLAGS) -I$(INC_DIRS) -L$(LIB_DIRS) $(LDFLAGS) $< -o $@
clean:
rm Yen
I am using C++17 as seen in the makefile. My computer is a MacBoock Pro, M1 chip,sonoma 14.0
So, I reproduced it, slightly modifying output for readability:
Live On Coliru (with graph.csv
from here)
#include <chrono>
#include <fstream>
#include <iostream>
#include <numeric>
#include <boost/graph/adjacency_list.hpp>
#include <boost/graph/dijkstra_shortest_paths.hpp>
using namespace std::chrono_literals;
auto now = std::chrono::high_resolution_clock::now;
struct NodeProperties {
int id;
};
typedef boost::adjacency_list< //
boost::listS, //
boost::vecS, //
boost::undirectedS, //
NodeProperties, //
boost::property<boost::edge_weight_t, double> //
>
Graph;
// typedef adjacency_list<listS, vecS, undirectedS,
// no_property, property<edge_weight_t, double> > Graph;
static Graph read_csv(std::string const& fname) {
/* READING CSV FILE: EDGE[0], EDGE[1], WEIGHT */
Graph g;
std::ifstream file(fname);
file.ignore(1024, '\n');
std::istringstream iss;
for (std::string line; std::getline(file, line);) {
iss.clear();
iss.str(line);
char comma;
size_t vertex1, vertex2;
double weight;
if (iss >> vertex1 >> comma && comma == ',' && //
iss >> vertex2 >> comma && comma == ',' && //
iss >> weight) {
/*auto [ok, e] =*/add_edge(vertex1, vertex2, {weight}, g);
}
}
return g;
}
int main() {
Graph g = read_csv("graph.csv");
std::vector<int> path(num_vertices(g));
std::vector<double> distances(num_vertices(g));
std::vector<long double> times(100);
for (auto& timing : times) {
auto start = now();
boost::dijkstra_shortest_paths(
g, 1,
predecessor_map(make_iterator_property_map(path.begin(), get(boost::vertex_index, g)))
.distance_map(make_iterator_property_map(distances.begin(), get(boost::vertex_index, g))));
timing = (now() - start) / 1.us;
// std::cout << "Dijkstra time: " << timing << "μs" << std::endl;
}
auto mean_t = std::accumulate(times.begin(), times.end(), 0.0l) / times.size();
std::cout << "Mean time: " << mean_t << "μs" << std::endl;
}
Local comparison for several random graphs shows a consistent performance gain of >85x
python ms | c++ μs | python μs | speed up factor |
---|---|---|---|
0.501399040222168 | 5.91381 | 501.399040222168 | 84.8 |
0.570063591003418 | 6.66114 | 570.063591003418 | 85.6 |
0.5150175094604492 | 5.76435 | 515.017509460449 | 89.3 |
0.6315255165100098 | 6.66467 | 631.52551651001 | 94.8 |
0.581965446472168 | 7.20989 | 581.965446472168 | 80.7 |
0.5532622337341309 | 6.14693 | 553.262233734131 | 90.0 |
0.6145501136779785 | 5.95821 | 614.550113677979 | 103.1 |
0.5210447311401367 | 6.02458 | 521.044731140137 | 86.5 |
0.5892086029052734 | 6.33592 | 589.208602905273 | 93.0 |
0.7422637939453125 | 6.22005 | 742.263793945313 | 119.3 |
You can improve it further by getting rid of unused inefficiencies:
using Graph = boost::adjacency_list< //
boost::vecS, // was: boost::listS,
boost::vecS, //
boost::undirectedS, //
boost::no_property, // was: NodeProperties
boost::property<boost::edge_weight_t, double> //
>;
Also simplifying the property maps (likely no difference due compiler inlining it all to the same code anyways):
dijkstra_shortest_paths( //
g, 1, //
boost::predecessor_map(path.data()) //
.distance_map(distances.data()));
Full listing on Coliru
Which leads to an average speedup factor of 109x:
python ms | c++ μs | speed up factor |
---|---|---|
0.552058219909668 | 5.02216 | 109.924458780618 |
0.5580687522888184 | 4.90002 | 113.891117238056 |
0.5484175682067871 | 5.17212 | 106.03341921819 |
0.569157600402832 | 5.07291 | 112.195485510847 |
0.5981898307800293 | 5.51395 | 108.486625881633 |
0.5295181274414062 | 4.87289 | 108.666135997613 |
0.5545711517333984 | 5.02684 | 110.322021734012 |
0.554049015045166 | 5.11177 | 108.38692176001 |
0.5611848831176758 | 5.23707 | 107.15626927226 |
0.5605340003967285 | 5.25329 | 106.701514745375 |