I have issues with assigning classes to intervals that dont meet the criteria in my loop function using the classInt library in R. How do a include a condition for an automatic class if the rows in the dataframe subset are less than the number of classes (n) defined for class intervals, please? In my example, n=3. Here is an example with a sample dataframe you can create in R
library(classInt)
library(rlist)
library(dplyr)
##Create dataframe
Country <- c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru',
'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru',
'China')
Time <- c(21, 18, 17, 10,10,15,27,0,2,4,5,7,4,8,9,10,5)
Area <- c("A","A","A","A","B","B","B","B","C","C","C","C","D","D","D","D","D")
DF <- data.frame(Country, Time, Area)
This should produce this dataframe:
Country Time Area
1 Australia 21 A
2 Italy 18 A
3 Peru 17 A
4 China 10 A
5 Australia 10 B
6 Italy 15 B
7 Peru 27 B
8 China 0 B
9 Australia 2 C
10 Italy 4 C
11 Peru 5 C
12 China 7 C
13 Nigeria 4 D
14 Australia 8 D
15 Italy 9 D
16 Peru 10 D
17 China 5 D
## Split by Country
NewXL <- split(DF,DF$Country)
## Generate the ranges and category/classes for each country
NewXL2 <- list()
for (i in 1:length(NewXL)) { AB <- NewXL[[i]]
#Create condition:
skip_to_next <- FALSE
tryCatch(Classes <- classIntervals(AB$Time, n=3,
cutlabels=F,style='fisher',factor=F,warnSmallN=F,warnLargeN=F), error = function(e) {
skip_to_next <<- TRUE})
if(skip_to_next) { next }
## Classify
# Range and Class for each Absolute population exposed
AB$Range_Abs <- classify_intervals(AB$Time, 3, "fisher", factor = T)
AB$Class_Abs <- classify_intervals(AB$Time,3, "fisher", factor = FALSE)
NewXL2[[i]] <-AB }
This results is a list of 5 countries with Nigeria being Null because it only has one row (at least 3 could have been ideal to create intervals). Is there a way to write the code for the loop such that I can define a class and range minimum to be added for any dataframe in the loop with only one row? In this case, Nigeria should have only one row so I could have a class of 3 (the maximum) assigned automatically to the single row and the range would be [0,4). Below is what the loop output looks like.
NewXL2
[[1]]
Country Time Area Range_Abs Class_Abs
1 Australia 21 A [15.5,21] 3
5 Australia 10 B [5,15.5) 2
9 Australia 2 C [2,5) 1
14 Australia 8 D [5,15.5) 2
[[2]]
Country Time Area Range_Abs Class_Abs
4 China 10 A [8.5,10] 3
8 China 0 B [0,2.5) 1
12 China 7 C [2.5,8.5) 2
17 China 5 D [2.5,8.5) 2
[[3]]
Country Time Area Range_Abs Class_Abs
2 Italy 18 A [12,18] 3
6 Italy 15 B [12,18] 3
10 Italy 4 C [4,6.5) 1
15 Italy 9 D [6.5,12) 2
[[4]]
NULL
[[5]]
Country Time Area Range_Abs Class_Abs
3 Peru 17 A [13.5,22) 2
7 Peru 27 B [22,27] 3
11 Peru 5 C [5,13.5) 1
16 Peru 10 D [5,13.5) 1
This is what the Nigeria dataframe should look like after the loop:
[[4]]
Country Time Area Range_Abs Class_Abs
13 Nigeria 10 D [0,4) 3
#Merge all lists into long dataframe with class intervals
NewXL2b <- list.rbind(NewXL2)
You could make use of an if/if else/else in your loop:
library(classInt)
data <- data.frame(
country = c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 'China'),
time = c(21, 18, 17, 10, 10, 15, 27, 0, 2, 4, 5, 7, 4, 8, 9, 10, 5),
area = c("A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "D", "D", "D", "D", "D")
)
split_data <- split(data, data$country)
result <- list()
for (i in 1:length(split_data)) {
split <- split_data[[i]]
if(nrow(split) == 1) {
# add a made up lower level less than the given time
# so that the given time is the second level
levels <- c(-1, split$time)
labels <- sprintf("[%s]", levels)
split$range_abs = factor(split$time, levels, labels)
split$class_abs = as.numeric(split$range_abs)
} else if(nrow(split) == 2) {
levels <- sort(split$time)
labels <- sprintf("[%s]", levels)
split$range_abs = factor(split$time, levels, labels)
split$class_abs = as.numeric(split$range_abs)
} else {
skip_to_next <- FALSE
tryCatch(
Classes <- classIntervals(
split$time,
n = 5,
cutlabels = FALSE,
style = 'fisher',
factor = FALSE,
warnSmallN = FALSE,
warnLargeN = FALSE
),
error = function(e) {
skip_to_next <<- TRUE
}
)
if(skip_to_next) { next }
split$range_abs <- classify_intervals(split$time, 3, "fisher", factor = TRUE)
split$class_abs <- classify_intervals(split$time, 3, "fisher", factor = FALSE)
}
result[[i]] <- split
}
result
#> [[1]]
#> country time area range_abs class_abs
#> 1 Australia 21 A [15.5,21] 3
#> 5 Australia 10 B [5,15.5) 2
#> 9 Australia 2 C [2,5) 1
#> 14 Australia 8 D [5,15.5) 2
#>
#> [[2]]
#> country time area range_abs class_abs
#> 4 China 10 A [8.5,10] 3
#> 8 China 0 B [0,2.5) 1
#> 12 China 7 C [2.5,8.5) 2
#> 17 China 5 D [2.5,8.5) 2
#>
#> [[3]]
#> country time area range_abs class_abs
#> 2 Italy 18 A [12,18] 3
#> 6 Italy 15 B [12,18] 3
#> 10 Italy 4 C [4,6.5) 1
#> 15 Italy 9 D [6.5,12) 2
#>
#> [[4]]
#> country time area range_abs class_abs
#> 13 Nigeria 4 D [4] 2
#>
#> [[5]]
#> country time area range_abs class_abs
#> 3 Peru 17 A [13.5,22) 2
#> 7 Peru 27 B [22,27] 3
#> 11 Peru 5 C [5,13.5) 1
#> 16 Peru 10 D [5,13.5) 1
Created on 2024-07-03 with reprex v2.1.0.9000
Reprex files hosted with on GitHub