I have created a SpatialPolygonsDataFrame in which countries are associated with regions. Here is what it looks like https://cloudstor.aarnet.edu.au/plus/index.php/s/RpYr3xyMrmhaGKA (EDU link with data):
As the object is too large to work with and as I am only interested in the information regions and not countries, I wish to merge the countries within each region. In other words, I wish to create a single polygon for each region. I have tried to run the following commands but the runs take hours and I have never been able to see whether they worked properly.
library(rgdal)
regions = readOGR("./regionscountriesSTACK.shp")
library(maptools)
regions <- unionSpatialPolygons(regions, IDs=regions@data$REGION)
library(rgeos)
gUnionCascaded(regions, id = regions@data$REGION)
gUnaryUnion(regions, id = regions@data$REGION)
Any advise on an efficient way to process? Thanks a lot!
You need to:
1) simplify the polygons, they likely are too complex to begin with, specially if you want to aggregate them by region. Use gSimplify
from rgeos package. Hard to help you further without the data.
2) remove the holes, they take up a lot of space and lead to troubles when you simplify
3) do the union and simplification with allowing for a serious simplification of the data
The following codes combines all of that, doing it country by country also allows to see the progress of the thing:
library(rgdal)
library(maptools)
library(rgeos)
# Remove all holes that are bigger than "limitholes", by default all of them
RemoveHoles <- function(SPol,limitHoles=+Inf){
fn <- function(subPol){
if(subPol@hole && subPol@area < limitHoles)
keep <- FALSE
else
keep <- TRUE
return(keep)
}
nPol <- length(SPol)
newPols <- list()
for(iPol in 1:nPol){
subPolygons <- list()
pol <- SPol@polygons[[iPol]]
for(iSubPol in 1:length(pol@Polygons)){
subPol <- pol@Polygons[[iSubPol]]
if(fn(subPol))
subPolygons[[length(subPolygons)+1]] <- subPol
}
newPols[[length(newPols)+1]] <- Polygons(subPolygons,pol@ID)
}
newSPol <- SpatialPolygons(newPols,proj4string=CRS(proj4string(SPol)))
# SPolSimple <- gSimplify(newSPol,tol=0.01)
newSPol <- createSPComment(newSPol)
return(newSPol)
}
# Union Polygon (country) by polygon for a given region
UnionSimplifyPolByPol <- function(subReg,precision=0.2){
# if(length(subReg)>1){
# out <- unionSpatialPolygons(subReg[1:2,],rep(1,2),threshold=0.1)
# out <- RemoveHoles(out)
# }
out <- c()
for(iCountry in 1:length(subReg)){
cat("Adding:",subReg@data[iCountry,"COUNTRY"],"\n")
toAdd <- gSimplify(as(subReg[iCountry,],"SpatialPolygons"),
tol=precision,topologyPreserve=TRUE)
toAdd <- RemoveHoles(toAdd)
if(is.null(out)){
out <- toAdd
}else{
toUnite <- rbind(out,toAdd)
out <- unionSpatialPolygons(toUnite,
IDs=rep(1,2),
threshold=precision)
}
out <- RemoveHoles(out)
# plot(out)
}
return(out)
}
# import the data
countries <- readOGR("regionscountriesSTACK.shp")
# you don't want to be bothered by factors
countries@data$COUNTRY <- as.character(countries@data$COUNTRY)
countries@data$REGION <- as.character(countries@data$REGION)
# aggregate region by region
vectRegions <- unique(countries@data$REGION)
world <- c()
for(iRegion in 1:length(vectRegions)){
regionName <- vectRegions[iRegion]
cat("Region:",regionName)
region <- UnionSimplifyPolByPol(countries[which(countries$REGION==regionName),])
region <- spChFIDs(region,regionName)
if(is.null(world)){
world <- region
}else{
world <- rbind(world,region)
}
plot(world)
}
This solution is implemented in the package spatDataManagement. Also you may want to use rworldmap
for a lighter world map if you are only interested by the regions. It then becomes:
library("spatDataManagement")
library("rworldmap")
levels(countriesLow@data$REGION)<-c(levels(countriesLow@data$REGION),"Other")
countriesLow@data$REGION[which(is.na(countriesLow@data$REGION))] <- "Other"
vectRegions <- unique(countriesLow@data$REGION)
world <- c()
for(iRegion in 1:length(vectRegions)){
regionName <- vectRegions[iRegion]
cat("Region:",regionName)
region <- UnionSimplifyPolByPol(countriesLow[which(countriesLow$REGION==regionName),])
region <- spChFIDs(region,gsub(" ","",regionName)) # IDs can't have spaces
if(is.null(world)){
world <- region
}else{
world <- rbind(world,region)
}
}
world <- SpatialPolygonsDataFrame(world,data.frame(id=1:length(world),name=vectRegions),match.ID=FALSE)
plot(world,col=world$id)
And this new map is much, much smaller (2.8 MegaBytes).