I have polygons where I want to calculate the percentage overlap between them. The idea is that when a polygon intersects another one, the percentage can be calculated on the perspective of one polygon or the other (i.e., the maximum value). Therefore, I want to make a script that generates percent coverage between polygons taking the percentage of overlap from one polygon and the other and them put all of the results in a data frame.
Here is the code that I have for the moment :
set.seed(131)
library(sf)
library(mapview)
m = rbind(c(0,0), c(1,0), c(1,1), c(0,1), c(0,0))
p = st_polygon(list(m))
n = 5
l = vector("list", n)
for (i in 1:n)
l[[i]] = p + 2 * runif(2)
s = st_sfc(l)
s.f = st_sf(s)
s.f$id = c(1,1,2,2,3)
s.f.2 = s.f %>% group_by(id) %>% summarise(geometry = sf::st_union(s)) # %>% summarise(area = st_area(s))
s.f.2$area = st_area(s.f.2)
i = s.f.2 %>%
st_intersection(.) %>%
mutate(intersect_area = st_area(.)) #%>%
st_intersection(s.f.2) %>%
mutate(intersect_area = st_area(.),
# id.int = sapply(i$origins, function(x) paste0(as.character(hr.pol$id)[x], collapse = ", ")),
id1 = sapply(i$origins, function(x) paste0(as.character(s.f.2$id)[x][1])),
id2 = sapply(i$origins, function(x) paste0(as.character(s.f.2$id)[x][2])),
area.id1 = sapply(i$origins, function(x) s.f.2$area[x][1]),
area.id2 = sapply(i$origins, function(x) s.f.2$area[x][2]),
perc1 = as.vector(intersect_area/area.id1),
perc2 = as.vector(intersect_area/area.id2)) %>% # create new column with shape area
filter(n.overlaps ==2) %>%
dplyr::select(id, intersect_area, id1, id2,
# id.int,
perc1,perc2) %>% # only select columns needed to merge
st_drop_geometry() %>% # drop geometry as we don't need it
select(-id) %>%
pivot_longer(#names_prefix = "id",
names_to = "perc",
cols = starts_with("perc"))
This code gives the percentage of overlap between the polygons (I'm doing it for only 2 overlap, but it would be nice if this is generalizable for more than one overlap!)
mapview(s.f.2,zcol = "id")
In the end, what I'm looking for is something like this :
id `1` `2` `3`
1 100 31.6 0
2 27.0 100 0
3 0 0 100
So polygon "1" covers 31.6% of the area of polygon "2" and polygon "2" covers 27.0% of the area of polygon "1".
What I have at the moment is (but is very slow):
data.sp = s.f.2 %>%
st_as_sf(.) %>%
mutate(area.m = st_area(geometry),
area.ha = units::set_units(area.m, ha)) %>%
select(-c(area,area.m))
id.sort = sort(unique(data.sp$id)) # used to reorder columns based on ID
df.fill =data.frame(id1 = NULL, id2=NULL, area =NULL, over1 = NULL, over2 = NULL)
for (k in 1:length(id.sort)) {
for (op in 1:length(id.sort)) {
int.out = st_intersection(data.sp[data.sp$id==id.sort[k],],
data.sp[data.sp$id==id.sort[op],])
# int.out
if(nrow(int.out) != 0) {
area.tmp = st_area(int.out)#/10000
over1 = area.tmp/int.out$area.ha
over2 = area.tmp/int.out$area.ha.1
} else {area.tmp = 0;over1=0;over2=0}
df.fill.tmp = data.frame(id1 = id.sort[k], id2=id.sort[op],
area = area.tmp,
over1 = over1*100,
over2 = over2*100)
df.fill = rbind(df.fill,df.fill.tmp)
}
}
df.fill$over1 = as.numeric(df.fill$over1)
df.fill$over2 = as.numeric(df.fill$over2)
df.fill %>%
select(-c(area, over2)) %>%
pivot_wider(names_from = id2,values_from = over1,
values_fill = 0)
Without a real example to benchmark, I'm not sure it's faster than your solution. But it's simpler and easier to understand (at least for my brain).
sf::st_intersection()
is vectorized. So it will find & return all the intersections of the first & second argument for you. In this case, the two arguments are the same set of polygons.
sf::st_intersection(s.f.2, s.f.2) %>%
dplyr::mutate(
area = sf::st_area(.),
proportion = area / area.1
) %>%
tibble::as_tibble() %>%
dplyr::select(
id_1 = id,
id_2 = id.1,
proportion,
) %>%
# tidyr::complete(id_1, id_2, fill = list(proportion = 0))
tidyr::pivot_wider(
names_from = id_1,
values_from = proportion,
values_fill = 0
)
Output:
# A tibble: 3 x 4
id_2 `1` `2` `3`
<dbl> <dbl> <dbl> <dbl>
1 1 1 0.316 0
2 2 0.270 1 0
3 3 0 0 1
Things to consider:
id_1
and id_2
.