I am switching from sp to sf but have some analysis which uses kernelUD
from adehabitatHR
which requires my data to be SpatialPoints
. Is there an equivalent which does not use sp
, perhaps instead using sf
or a regular dataframe, which offers similar bandwidth options?
A snippet of my original code using Genetta genetta as an example:
library(adehabitatHR)
G_genetta <- as.data.frame(matrix(c(0.5519758, 0.27524548,
0.5725632, 0.12309273,
0.5547747, 0.06100429,
0.6110925, 0.16211416,
0.5951087, 0.09316814,
0.5333567, 0.11673812,
0.5855461, 0.11170616,
0.5221387, 0.11061583,
0.5848452, 0.17213175),
ncol = 2, byrow = TRUE))
mask_xy_grid <- expand.grid(x = seq(0.01, 1, by = 0.01), y = seq(0.01, 1, by = 0.01))
coordinates(mask_xy_grid) <- ~ x + y
gridded(mask_xy_grid) <- TRUE
ade_G_genetta <- kernelUD(SpatialPoints(G_genetta),
h = "href",
grid = mask_xy_grid,
kern = "bivnorm")
plot(ade_G_genetta)
I am aware of the ks
package which I could use as follows:
library(ks)
mask_xy <- as.data.frame(expand.grid(x = seq(0.01, 1, by = 0.01), y = seq(0.01, 1, by = 0.01)))
kde_G_genetta <- kde(G_genetta,
eval.points = mask_xy)
# lazily get comparable plot
kde_G_genetta <- SpatialPixelsDataFrame(points = kde_G_genetta$eval.points,
data = data.frame(est = kde_G_genetta$estimate))
plot(kde_G_genetta)
but this doesn't offer the same bandwidth options as kernelUD
. I particularly don't understand why kernelUD
takes a single scalar value for "h" whereas kde
takes a matrix when they are both applied to a multivariate problem. kde
also doesn't seem to cope well with limited sampling points, e.g. Alcelaphus buselaphus which gives the error:
Error in chol.default(S) :
the leading minor of order 2 is not positive definite
A_buselaphus <- as.data.frame(matrix(c(0.5109837, 0.1247711,
0.5109837, 0.1247711,
0.5893287, 0.1613403,
0.5893287, 0.1613403,
0.5893287, 0.1613403),
ncol = 2, byrow = TRUE))
# using kernelUD
ade_A_buselaphus <- kernelUD(SpatialPoints(A_buselaphus),
h = "href",
grid = mask_xy_grid,
kern = "bivnorm")
plot(ade_A_buselaphus)
# using kde
kde_A_buselaphus <- kde(A_buselaphus,
eval.points = mask_xy)
# lazily get comparable plot
kde_A_buselaphus <- SpatialPixelsDataFrame(points = kde_A_buselaphus$eval.points,
data = data.frame(est = kde_A_buselaphus$estimate))
plot(kde_A_buselaphus)
I could use MASS, but I run into the same bandwidth problem as it takes a "vector of bandwidths for x and y directions":
library(MASS)
mask.xy <- as.data.frame(expand.grid(x = seq(0.01, 1, by = 0.01), y = seq(0.01, 1, by = 0.01)))
MASS_G_genetta <- kde2d(G_genetta$V1,
G_genetta$V2,
n = c(100, 100),
lims = c(range(mask.xy$x), range(mask.xy$y)))
# lazily get comparable plots
MASS_G_genetta <- SpatialPixelsDataFrame(points = mask.xy,
data = data.frame(est = melt(MASS_G_genetta$z)$value))
plot(MASS_G_genetta)
# same axis
plot(ade_G_genetta, zlim = c(0,75))
plot(kde_G_genetta, zlim = c(0,75))
plot(MASS_G_genetta, zlim = c(0,75))
Having done some digging into the source code of kernelUD
I have found a tolerable, albeit incomplete, solution. I have only been looking within documentation and source code which relates to a bivariate normal kernel using the ad hoc method for calculating h
.
To find the solution I followed a trail of functions within the CRAN repo:
kernelUD
-> .kernelUDs
-> void kernelhr
-> void epa
which led me to this comment within the epa function:
/* Keep only the points no further than 4*fen of the current pixel */
fen
equates to h
which according to the documentation is by default calculated with:
for a bivariate normal kernel. I can write this generically as follows:
(sqrt(0.5*(var(species_xy[[1]]) + var(species_xy[[2]]))))*(nrow(species_xy)^-(1/6))
Since kde2d
from MASS
takes a scalar vector, I can supply it with h*4 for each dimension. The solution for Genetta genetta would therefore be:
library(adehabitatHR)
library(MASS)
# using adehabitatHR
G_genetta <- as.data.frame(matrix(c(0.5519758, 0.27524548,
0.5725632, 0.12309273,
0.5547747, 0.06100429,
0.6110925, 0.16211416,
0.5951087, 0.09316814,
0.5333567, 0.11673812,
0.5855461, 0.11170616,
0.5221387, 0.11061583,
0.5848452, 0.17213175),
ncol = 2, byrow = TRUE))
mask_xy_grid <- expand.grid(x = seq(0.01, 1, by = 0.01), y = seq(0.01, 1, by = 0.01))
coordinates(mask_xy_grid) <- ~ x + y
gridded(mask_xy_grid) <- TRUE
ade_G_genetta <- kernelUD(SpatialPoints(G_genetta),
h = "href",
grid = mask_xy_grid,
kern = "bivnorm")
plot(ade_G_genetta)
# using MASS
mask.xy <- as.data.frame(expand.grid(x = seq(0.01, 1, by = 0.01), y = seq(0.01, 1, by = 0.01)))
H <- (sqrt(0.5*(var(G_genetta[[1]]) + var(G_genetta[[2]]))))*(nrow(G_genetta)^-(1/6))
MASS_G_genetta <- kde2d(G_genetta$V1,
G_genetta$V2,
n = c(100, 100),
h = c(H*4, H*4),
lims = c(range(mask.xy$x), range(mask.xy$y)))
# lazily get comparable plots
MASS_G_genetta <- SpatialPixelsDataFrame(points = mask.xy,
data = data.frame(est = melt(MASS_G_genetta$z)$value))
plot(MASS_G_genetta)
# to confirm this is the same h as the kernelUD output:
kde_G_genetta@h[["h"]]
H
kde_G_genetta@h[["h"]] == H
The exact values in the output differ minorly, presumably due to differences in precision between the two methods. A quick summary of the two objects shows us they are functionally the same and backs up the similarity in their plots.
summary(kde_G_genetta@data[["ud"]]
summary(MASS_G_genetta$est)