rloopsfor-loopgroupingintervals

Adding conditions to loop to generate class intervals using ClassInt in R for areas with fewer n classes


I have issues with assigning classes to intervals that dont meet the criteria in my loop function using the classInt library in R. How do a include a condition for an automatic class if the rows in the dataframe subset are less than the number of classes (n) defined for class intervals, please? In my example, n=3. Here is an example with a sample dataframe you can create in R

library(classInt)
library(rlist)
library(dplyr)

##Create dataframe 
Country <- c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 
'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 
'China')
Time <- c(21, 18, 17, 10,10,15,27,0,2,4,5,7,4,8,9,10,5)
Area <- c("A","A","A","A","B","B","B","B","C","C","C","C","D","D","D","D","D")
DF  <- data.frame(Country, Time, Area)

This should produce this dataframe:


      Country Time Area
 1  Australia   21    A
 2      Italy   18    A
 3       Peru   17    A
 4      China   10    A
 5  Australia   10    B
 6      Italy   15    B
 7       Peru   27    B
 8      China    0    B
 9  Australia    2    C
 10     Italy    4    C
 11      Peru    5    C
 12     China    7    C
 13   Nigeria    4    D
 14 Australia    8    D
 15     Italy    9    D
 16      Peru   10    D
 17     China    5    D


## Split by Country
NewXL <- split(DF,DF$Country)

## Generate the ranges and category/classes for each country
NewXL2 <- list()
for (i in 1:length(NewXL)) { AB <- NewXL[[i]]
#Create condition:
skip_to_next <- FALSE
tryCatch(Classes <- classIntervals(AB$Time, n=3, 
cutlabels=F,style='fisher',factor=F,warnSmallN=F,warnLargeN=F), error = function(e) { 
skip_to_next <<- TRUE})
if(skip_to_next) { next } 
## Classify
# Range and Class for each Absolute population exposed
AB$Range_Abs <- classify_intervals(AB$Time, 3, "fisher", factor = T)
AB$Class_Abs <- classify_intervals(AB$Time,3, "fisher", factor = FALSE)

NewXL2[[i]] <-AB }

This results is a list of 5 countries with Nigeria being Null because it only has one row (at least 3 could have been ideal to create intervals). Is there a way to write the code for the loop such that I can define a class and range minimum to be added for any dataframe in the loop with only one row? In this case, Nigeria should have only one row so I could have a class of 3 (the maximum) assigned automatically to the single row and the range would be [0,4). Below is what the loop output looks like.

  NewXL2
  [[1]]
       Country Time Area Range_Abs Class_Abs
  1  Australia   21    A [15.5,21]         3
  5  Australia   10    B  [5,15.5)         2
  9  Australia    2    C     [2,5)         1
  14 Australia    8    D  [5,15.5)         2

  [[2]]
     Country Time Area Range_Abs Class_Abs
  4    China   10    A  [8.5,10]         3
  8    China    0    B   [0,2.5)         1
  12   China    7    C [2.5,8.5)         2
  17   China    5    D [2.5,8.5)         2

  [[3]]
     Country Time Area Range_Abs Class_Abs
  2    Italy   18    A   [12,18]         3
  6    Italy   15    B   [12,18]         3
  10   Italy    4    C   [4,6.5)         1
  15   Italy    9    D  [6.5,12)         2

  [[4]]
  NULL

  [[5]]
     Country Time Area Range_Abs Class_Abs
  3     Peru   17    A [13.5,22)         2
  7     Peru   27    B   [22,27]         3
  11    Peru    5    C  [5,13.5)         1
  16    Peru   10    D  [5,13.5)         1

This is what the Nigeria dataframe should look like after the loop:

[[4]]
    Country Time Area Range_Abs Class_Abs
 13 Nigeria   10    D     [0,4)         3  

 #Merge all lists into long dataframe with class intervals
 NewXL2b <- list.rbind(NewXL2)  

Solution

  • You could make use of an if/if else/else in your loop:

    library(classInt)
    
    data <- data.frame(
      country = c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 'China'), 
      time = c(21, 18, 17, 10, 10, 15, 27, 0, 2, 4, 5, 7, 4, 8, 9, 10, 5), 
      area = c("A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "D", "D", "D", "D", "D")
    )
    
    split_data <- split(data, data$country)
    
    result <- list()
    
    for (i in 1:length(split_data)) {
      split <- split_data[[i]]
      
      if(nrow(split) == 1) {
        # add a made up lower level less than the given time
        # so that the given time is the second level
        levels <- c(-1, split$time)
        labels <- sprintf("[%s]", levels)
        split$range_abs = factor(split$time, levels, labels)
        split$class_abs = as.numeric(split$range_abs)
      } else if(nrow(split) == 2) {
        levels <- sort(split$time)
        labels <- sprintf("[%s]", levels)
        split$range_abs = factor(split$time, levels, labels)
        split$class_abs = as.numeric(split$range_abs)
      } else {
        skip_to_next <- FALSE
      
        tryCatch(
          Classes <- classIntervals(
            split$time, 
            n = 5, 
            cutlabels = FALSE,
            style = 'fisher',
            factor = FALSE,
            warnSmallN = FALSE,
            warnLargeN = FALSE
          ), 
          error = function(e) { 
            skip_to_next <<- TRUE
          }
        )
      
        if(skip_to_next) { next } 
      
        split$range_abs <- classify_intervals(split$time, 3, "fisher", factor = TRUE)
        split$class_abs <- classify_intervals(split$time, 3, "fisher", factor = FALSE)
      }
    
      result[[i]] <- split
    }
    
    result
    #> [[1]]
    #>      country time area range_abs class_abs
    #> 1  Australia   21    A [15.5,21]         3
    #> 5  Australia   10    B  [5,15.5)         2
    #> 9  Australia    2    C     [2,5)         1
    #> 14 Australia    8    D  [5,15.5)         2
    #> 
    #> [[2]]
    #>    country time area range_abs class_abs
    #> 4    China   10    A  [8.5,10]         3
    #> 8    China    0    B   [0,2.5)         1
    #> 12   China    7    C [2.5,8.5)         2
    #> 17   China    5    D [2.5,8.5)         2
    #> 
    #> [[3]]
    #>    country time area range_abs class_abs
    #> 2    Italy   18    A   [12,18]         3
    #> 6    Italy   15    B   [12,18]         3
    #> 10   Italy    4    C   [4,6.5)         1
    #> 15   Italy    9    D  [6.5,12)         2
    #> 
    #> [[4]]
    #>    country time area range_abs class_abs
    #> 13 Nigeria    4    D       [4]         2
    #> 
    #> [[5]]
    #>    country time area range_abs class_abs
    #> 3     Peru   17    A [13.5,22)         2
    #> 7     Peru   27    B   [22,27]         3
    #> 11    Peru    5    C  [5,13.5)         1
    #> 16    Peru   10    D  [5,13.5)         1
    

    Created on 2024-07-03 with reprex v2.1.0.9000

    Reprex files hosted with on GitHub