I have an X by Y grid with cells containing 1 if a certain criteria is met or 0 if it is not. Now I want to identify features in the grid where there are at least N contiguous cells containing a 1. Contiguous cells can be adjacent side by side, or adjacent diagonally. I made a picture to illustrate the problem (see link), with N = 5. For clarity I omitted marking the 0s, and they are in the unmarked cells. Red 1s belong to features I want to identify, and black 1s do not. The desired result would be as shown in the picture, but with all the black 1s changed to 0s. I use R, so solutions using that language would be thoroughly appreciated, but I'll happily settle for others. I couldn't find anything in the R libraries (such as rgeos) specifically, but maybe I'm missing something. Any help appreciated, thanks!
Here is a small reproducible example created
input.mat <- structure(c(1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 1L, 1L, 1L), .Dim = c(15L, 15L), .Dimnames = list(NULL, NULL))
input.mat
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15]
[1,] 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0
[2,] 1 1 0 0 1 1 1 0 0 1 0 0 0 1 0
[3,] 0 0 1 0 0 0 0 0 0 1 1 0 1 0 1
[4,] 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0
[5,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[6,] 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0
[7,] 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0
[8,] 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
[9,] 1 0 0 0 0 1 0 1 0 0 0 1 1 1 0
[10,] 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0
[11,] 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1
[12,] 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
[13,] 0 0 1 0 1 0 0 0 1 0 0 0 0 0 1
[14,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
[15,] 1 1 1 1 1 0 0 0 1 1 0 0 0 0 1
output.mat <- structure(c(1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 1L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L), .Dim = c(15L, 15L), .Dimnames = list(NULL, NULL))
output.mat
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14] [,15]
[1,] 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0
[2,] 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0
[3,] 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1
[4,] 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0
[5,] 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
[6,] 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0
[7,] 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0
[8,] 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0
[9,] 1 0 0 0 0 0 0 0 0 0 0 1 1 1 0
[10,] 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0
[11,] 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1
[12,] 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
[13,] 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0
[14,] 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0
[15,] 1 1 1 1 1 0 0 0 1 1 0 0 0 0 0
Created on 2021-05-27 by the reprex package (v2.0.0)
Here is a base R code for 2D points clustering
# compute distance from point `x` to point set `S`
fdist <- function(x, S) {
if (length(S) == 0) {
return(0)
}
v <- x - S
pmax(abs(Re(v)), abs(Im(v)))
}
# assign groups based on distance
fgrp <- function(x, clst) {
for (k in seq_along(clst)) {
if (any(fdist(x, clst[[k]]) < 2)) {
clst[[k]] <- c(clst[[k]], x)
return(clst)
}
}
}
# use complex number represent 2D points
p <- c(which(input.mat == 1, arr.ind = TRUE) %*% c(1, 1i))
# initialize cluster list
clst <- list()
while (length(p) > 0) {
idxrm <- c()
for (k in seq_along(p)) {
clst_new <- fgrp(p[k], clst)
if (sum(lengths(clst_new)) > sum(lengths(clst))) {
idxrm <- c(idxrm, k)
clst <- clst_new
}
}
if (length(idxrm) == 0) {
clst <- c(clst, list(p[1]))
} else {
p <- p[-idxrm]
}
}
# keep points that follows the contiguous pattern
N <- 5
Z <- do.call(
c,
Filter(
function(x) length(x) >= N,
Map(
unique,
clst
)
)
)
# produce output matrix
output.mat <- input.mat * 0
output.mat[cbind(Re(Z), Im(Z))] <- 1
and you will obtain
> output.mat
[,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
[1,] 1 1 0 0 0 0 0 0 0 0 0 0 1
[2,] 1 1 0 0 0 0 0 0 0 0 0 0 0
[3,] 0 0 1 0 0 0 0 0 0 0 0 0 1
[4,] 0 0 0 1 0 0 0 0 0 0 0 0 0
[5,] 0 0 0 0 0 0 0 0 0 0 0 1 0
[6,] 1 0 0 0 0 0 0 0 0 0 1 0 1
[7,] 1 1 0 0 0 0 0 0 0 0 0 1 0
[8,] 1 1 0 0 0 0 0 0 0 0 0 0 0
[9,] 1 0 0 0 0 0 0 0 0 0 0 1 1
[10,] 0 0 0 0 0 0 0 0 0 0 0 1 1
[11,] 0 0 1 0 1 0 0 0 0 0 0 0 0
[12,] 0 0 0 1 0 0 0 0 0 1 0 0 0
[13,] 0 0 1 0 1 0 0 0 1 0 0 0 0
[14,] 0 0 0 0 0 0 0 0 1 0 0 0 0
[15,] 1 1 1 1 1 0 0 0 1 1 0 0 0
[,14] [,15]
[1,] 0 0
[2,] 1 0
[3,] 0 1
[4,] 1 0
[5,] 0 0
[6,] 1 0
[7,] 0 0
[8,] 0 0
[9,] 1 0
[10,] 1 0
[11,] 0 1
[12,] 0 0
[13,] 0 0
[14,] 0 0
[15,] 0 0
1
s, i.e., row-column indices