rspatialrgdalr-maptools

merging polygons of a SpatialPolygonDataframe


I have created a SpatialPolygonsDataFrame in which countries are associated with regions. Here is what it looks like https://cloudstor.aarnet.edu.au/plus/index.php/s/RpYr3xyMrmhaGKA (EDU link with data):

enter image description here

As the object is too large to work with and as I am only interested in the information regions and not countries, I wish to merge the countries within each region. In other words, I wish to create a single polygon for each region. I have tried to run the following commands but the runs take hours and I have never been able to see whether they worked properly.

library(rgdal)
regions = readOGR("./regionscountriesSTACK.shp")
library(maptools)
regions <- unionSpatialPolygons(regions, IDs=regions@data$REGION)
library(rgeos)
gUnionCascaded(regions, id = regions@data$REGION)
gUnaryUnion(regions, id = regions@data$REGION)

Any advise on an efficient way to process? Thanks a lot!


Solution

  • You need to: 1) simplify the polygons, they likely are too complex to begin with, specially if you want to aggregate them by region. Use gSimplify from rgeos package. Hard to help you further without the data. 2) remove the holes, they take up a lot of space and lead to troubles when you simplify 3) do the union and simplification with allowing for a serious simplification of the data

    The following codes combines all of that, doing it country by country also allows to see the progress of the thing:

    library(rgdal)
    library(maptools)
    library(rgeos)
    
    # Remove all holes that are bigger than "limitholes", by default all of them
    RemoveHoles <- function(SPol,limitHoles=+Inf){
        fn <- function(subPol){
            if(subPol@hole && subPol@area < limitHoles) 
                keep <- FALSE
            else
                keep <- TRUE
            return(keep)
        }
        nPol <- length(SPol)
        newPols <- list()
        for(iPol in 1:nPol){
            subPolygons <- list()
            pol <- SPol@polygons[[iPol]]
            for(iSubPol in 1:length(pol@Polygons)){
                subPol <- pol@Polygons[[iSubPol]]
    
                if(fn(subPol))
                    subPolygons[[length(subPolygons)+1]] <- subPol
            }
            newPols[[length(newPols)+1]] <- Polygons(subPolygons,pol@ID)
        }
        newSPol <- SpatialPolygons(newPols,proj4string=CRS(proj4string(SPol)))
        # SPolSimple <- gSimplify(newSPol,tol=0.01)
        newSPol <- createSPComment(newSPol)
        return(newSPol)
    }
    
    # Union Polygon (country) by polygon for a given region
    UnionSimplifyPolByPol <- function(subReg,precision=0.2){
        # if(length(subReg)>1){
        #     out <- unionSpatialPolygons(subReg[1:2,],rep(1,2),threshold=0.1)
        #     out <- RemoveHoles(out)
        # }
        out <- c()
        for(iCountry in 1:length(subReg)){
            cat("Adding:",subReg@data[iCountry,"COUNTRY"],"\n") 
            toAdd <- gSimplify(as(subReg[iCountry,],"SpatialPolygons"),
                               tol=precision,topologyPreserve=TRUE)
            toAdd <- RemoveHoles(toAdd)
            if(is.null(out)){
                out <- toAdd
            }else{
                toUnite <- rbind(out,toAdd)
                out <- unionSpatialPolygons(toUnite,
                                            IDs=rep(1,2),
                                            threshold=precision)
            }
            out <- RemoveHoles(out)
            # plot(out)
        }
        return(out)
    }
    # import the data
    countries <- readOGR("regionscountriesSTACK.shp")
    
    # you don't want to be bothered by factors
    countries@data$COUNTRY <- as.character(countries@data$COUNTRY)
    countries@data$REGION <- as.character(countries@data$REGION)
    
    # aggregate region by region  
    vectRegions <- unique(countries@data$REGION)
    world <- c()
    for(iRegion in 1:length(vectRegions)){
        regionName <- vectRegions[iRegion]
        cat("Region:",regionName)
        region <- UnionSimplifyPolByPol(countries[which(countries$REGION==regionName),])
        region <- spChFIDs(region,regionName)
        if(is.null(world)){
            world <- region
        }else{
            world <- rbind(world,region)
        }
        plot(world)
    }
    

    enter image description here

    This solution is implemented in the package spatDataManagement. Also you may want to use rworldmap for a lighter world map if you are only interested by the regions. It then becomes:

    library("spatDataManagement")
    library("rworldmap")
    levels(countriesLow@data$REGION)<-c(levels(countriesLow@data$REGION),"Other")
    countriesLow@data$REGION[which(is.na(countriesLow@data$REGION))] <- "Other"
    
    vectRegions <- unique(countriesLow@data$REGION)
    world <- c()
    for(iRegion in 1:length(vectRegions)){
        regionName <- vectRegions[iRegion]
        cat("Region:",regionName)
        region <- UnionSimplifyPolByPol(countriesLow[which(countriesLow$REGION==regionName),])
        region <- spChFIDs(region,gsub(" ","",regionName)) # IDs can't have spaces
        if(is.null(world)){
            world <- region
        }else{
            world <- rbind(world,region)
        }
    }
    world <- SpatialPolygonsDataFrame(world,data.frame(id=1:length(world),name=vectRegions),match.ID=FALSE)
    plot(world,col=world$id)
    

    enter image description here

    And this new map is much, much smaller (2.8 MegaBytes).