While profiling my code I discovered that the part that sets the new weight in a graph
takes the most time to run.
Since I have to do it repeatedly on much bigger graph than in this example, the running time adds up.
Is it possible to optimize it?
Here is a mini example:
library(sfnetworks)
library(sf)
First I create an example sf_network (iGraph):
net = as_sfnetwork(roxel, directed = FALSE) %>%
st_transform(3035) %>%
activate("edges") %>%
mutate(weight = edge_length())
Then I select two nodes and search for a path between them:
paths = st_network_paths(net, from = 495, to = c(458, 121), weights = "weight")
I select on path (the edges of it) and:
edges<-unlist(paths$edge_paths[1])
This part takes the most time to run and if possible I would like to optimize:
Read the current weight of the edges, multiply it with 100 and set the new weight for the edges.
my_sfn<-set.edge.attribute(graph = net,name = "weight",index = edges,value = (get.edge.attribute(graph = net,name = "weight",index = edges))*100)
Is it possible to optimize this part ?
#update:
One approach Im thinking about is to maybe make it in parallel:
Basically divide the edges vector on different cpus and work with it
but the problem is; as result I would get a list of graphs.
For example:
edges contains indices 456,432,124,567,854,235,789,111... (hypothetically) I would divide the list by 2 and do a mlapply on the sfn net.
CPU1 -> apply -> 456,432,124,567 on sfn net -> results in a copy of a net with the weight of 456,432,124,567 edges changed
CPU2 -> apply -> 854,235,789,111 on sfn net -> results in a copy of a net with the weight of 854,235,789,111 edges changed
Or one other approach I was thinking about : sfnet is a tbl_graph right? cant I use like future_apply on that tbl_graph?
##update
Parallel does not seem to bring an improvement in this case:
library(microbenchmark)
library(parallel)
edges_chunks<-split(edges, ceiling(seq_along(edges)/(length(edges)/4)))
microbenchmark::microbenchmark(
new_net<-set.edge.attribute(graph = net,name = "weight",index = edges,value = 10000),
new_net_list<-lapply(edges_chunks, function(x)set.edge.attribute(graph = net,name = "weight",index = x,value = 10000)),
new_net_list<-mclapply(edges_chunks, function(x)set.edge.attribute(graph = net,name = "weight",index = x,value = 10000), mc.preschedule=TRUE,mc.cores = n.cores)
)
min lq mean median uq max neval
750.120 887.842 1061.393 1036.850 1176.605 1769.066 100
3111.609 3609.785 4086.067 4002.368 4435.135 7891.939 100
204809.249 208559.875 213134.374 209879.946 211954.072 320525.229 100
Thank you.
UPDATE
Hi Andrea!
thank you for taking the time to answer my question.
I looked into it and the benefit of igraph approach disappears as soon as I have to work with gps coordinates instead of the indices themselves.
I checkt SF_Network code to see how you guys are doing it (find an indices to a gps coordinate) and made this benchmark.
The slowdown seem to be happening in the part where I have to match a gps coordinate to indices which are required to find a shortest path.
So to gain speed i have to find a way to make the matching of coordinates to indices faster.
library(sfnetworks)
library(sf)
library(tidygraph)
library(tidyverse)
library(igraph)
my_sfn = as_sfnetwork(roxel, directed = FALSE) %>%
st_transform(4326) %>%
activate("edges") %>%
mutate(weight = edge_length())
start_lon<-7.572567248901512
start_lat<-51.92503522221193
ziel_lon<-7.68342049920836
ziel_lat<-52.00488468255698
start_point = st_sfc(st_point(c(start_lon, start_lat)))
start_point<-start_point |> st_set_crs(4326)
dest_point = st_sfc(st_point(c(ziel_lon, y = ziel_lat)))
dest_point<-dest_point |> st_set_crs(4326)
node_geom_colname = function(x) {
col = attr(vertex_attr(x), "sf_column")
if (is.null(col)) {
# Take the name of the first sfc column.
sfc_idx = which(vapply(vertex_attr(x), is.sfc, FUN.VALUE = logical(1)))[1]
col = vertex_attr_names(x)[sfc_idx]
}
col
}
valid_agr = function(agr, names, levels = sf:::agr_levels) {
if (is.null(agr)) {
new_agr = empty_agr(names)
} else {
new_agr = structure(agr[names], names = names, levels = levels)
}
new_agr
}
node_feature_attribute_names = function(x) {
g_attrs = node_attribute_names(x)
g_attrs[g_attrs != node_geom_colname(x)]
}
node_attribute_names = function(x) {
vertex_attr_names(x)
}
node_agr = function(x) {
agr = attr(vertex_attr(x), "agr")
valid_agr(agr, node_feature_attribute_names(x))
}
nodes_as_sf = function(x, ...) {
st_as_sf(
as_tibble(as_tbl_graph(x), "nodes"),
agr = node_agr(x),
sf_column_name = node_geom_colname(x)
)
}
weight <- edge_attr(my_sfn, "weight")
bench::mark(
sfnetworks = {
path1_sfn <- st_network_paths(my_sfn, from = st_nearest_feature(st_geometry(start_point), nodes_as_sf(my_sfn)), to = st_nearest_feature(st_geometry(dest_point), nodes_as_sf(my_sfn)), weights = "weight")
edges <- path1_sfn$edge_paths[[1]]
my_sfn_2 <- set_edge_attr(
graph = my_sfn,
name = "weight",
index = edges,
value = edge_attr(my_sfn, "weight", edges) * 100
)
path2_sfn <- st_network_paths(my_sfn_2, from = st_nearest_feature(st_geometry(start_point), nodes_as_sf(my_sfn_2)), to = st_nearest_feature(st_geometry(dest_point), nodes_as_sf(my_sfn_2)), weights = "weight")
path2_sfn$node_paths[[1]]
},
igraph = {
weight <- edge_attr(my_sfn, "weight")
path <- shortest_paths(my_sfn, from = st_nearest_feature(st_geometry(start_point), nodes_as_sf(my_sfn)), to = st_nearest_feature(st_geometry(dest_point), nodes_as_sf(my_sfn)), weights = weight, output = "both")
weight[path$epath[[1]]] <- weight[path$epath[[1]]] * 100
path <- shortest_paths(my_sfn, from = st_nearest_feature(st_geometry(start_point), nodes_as_sf(my_sfn)), to = st_nearest_feature(st_geometry(dest_point), nodes_as_sf(my_sfn)), weights = weight, output = "vpath")
as.integer(path$vpath[[1]])
},
iterations = 15L
)
Result
# A tibble: 2 × 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 sfnetworks 37.2ms 38.3ms 26.2 2.08MB 4.02 13 2 497ms <int [37]> <Rprofmem> <bench_tm> <tibble>
2 igraph 35.8ms 36.2ms 27.5 1.31MB 4.23 13 2 473ms <int [37]> <Rprofmem> <bench_tm> <tibble>
I try to summarise here one example plus some ideas that could be useful to improve the computational efficiency of your code. First, load some packages
suppressPackageStartupMessages({
library(sf)
library(tidygraph)
library(igraph)
library(sfnetworks)
})
The following code creates a toy network with 1e5 nodes and approximately 7e5 edges. I didn’t test the code with larger networks.
set.seed(1)
my_graph <- play_geometry(n = 100000, radius = 0.0066)
my_sfn <- as_sfnetwork(my_graph, coords = c("x", "y")) %>%
convert(to_spatial_explicit, .clean = TRUE) %E>%
mutate(weight = edge_length())
#> Checking if spatial network structure is valid...
#> Spatial network structure is valid
Now we try to replicate the code that you showed using sfnetworks + igraph. First, we compute the shortest path between nodes 495 and 458
path1_sfn <- st_network_paths(my_sfn, from = 495, to = 458, weights = "weight")
Let’s check the output (and, in particular, the node path)
edges <- path1_sfn$edge_paths[[1]]
(path1_sfn$node_paths[[1]])
#> [1] 495 336 399 506 558 621 667 898 903 647 493 392 256 185 248 439 380 458
The following code multiplies the weights of the relevant edges by 100
my_sfn <- set_edge_attr(
graph = my_sfn,
name = "weight",
index = edges,
value = edge_attr(my_sfn, "weight", edges) * 100
)
and, finally, we recompute the shortest path between nodes 495 and 458
path2_sfn <- st_network_paths(my_sfn, from = 495, to = 458, weights = "weight")
and check the output
path2_sfn$node_paths[[1]]
#> [1] 495 288 336 249 506 538 621 649 881 898 977 970 831 619 750 535 587 439 300
#> [20] 380 481 458
Now we restore the original weights and repeat the same operations as before using only igraph code
my_sfn <- my_sfn %E>% mutate(weight = edge_length())
weight <- edge_attr(my_sfn, "weight")
path <- shortest_paths(my_sfn, from = 495, to = 458, weights = weight, output = "both")
as.integer(path$vpath[[1]])
#> [1] 495 336 399 506 558 621 667 898 903 647 493 392 256 185 248 439 380 458
weight[path$epath[[1]]] <- weight[path$epath[[1]]] * 100
path <- shortest_paths(my_sfn, from = 495, to = 458, weights = weight, output = "vpath")
as.integer(path$vpath[[1]])
#> [1] 495 288 336 249 506 538 621 649 881 898 977 970 831 619 750 535 587 439 300
#> [20] 380 481 458
Restore the original output and benchmark the two approaches:
my_sfn <- my_sfn %E>% mutate(weight = edge_length())
bench::mark(
sfnetworks = {
path1_sfn <- st_network_paths(my_sfn, from = 495, to = 458, weights = "weight")
edges <- path1_sfn$edge_paths[[1]]
my_sfn_2 <- set_edge_attr(
graph = my_sfn,
name = "weight",
index = edges,
value = edge_attr(my_sfn, "weight", edges) * 100
)
path2_sfn <- st_network_paths(my_sfn_2, from = 495, to = 458, weights = "weight")
path2_sfn$node_paths[[1]]
},
igraph = {
weight <- edge_attr(my_sfn, "weight")
path <- shortest_paths(my_sfn, from = 495, to = 458, weights = weight, output = "both")
weight[path$epath[[1]]] <- weight[path$epath[[1]]] * 100
path <- shortest_paths(my_sfn, from = 495, to = 458, weights = weight, output = "vpath")
as.integer(path$vpath[[1]])
},
iterations = 15L
)
#> Warning: Some expressions had a GC in every iteration; so filtering is disabled.
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc `gc/sec`
#> <bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl>
#> 1 sfnetworks 1.34s 2.85s 0.355 117.2MB 1.11
#> 2 igraph 135.91ms 280.26ms 2.52 37.1MB 1.35
Created on 2022-09-03 with reprex v2.0.2
Just two final notes: