Conditional counting based on multiple conditions

I have a tidy ecological dataset in which every row is a single specimen/individual, with multiple columns for multiple variables.

#fake dataset
loc <- c(1,1,2,2,2,3,3,3,3,3,3,3,3)
date <- c(2021, 2022, 2021, 2021, 2022, 2021, 2021, 2022, 2023, 2023, 2023, 2023, 2023)
hab <- c("w", "l", "w", "w", "w", "l", "l", "w", "w", "w", "w", "w", "w")
spec <- c("frog", "frog", "frog", "frog", "frog", "beaver", "beaver", "beaver", "kingfisher", "kingfisher", "kingfisher", "kingfisher", "kingfisher")
n <- c(1,1,1,1,1,1,1,1,1,1,1,1,1)

df <- tibble(loc, date, hab, spec, n)

I want to condense different individuals, collected in the same location/date/habitat, into the same row, but only for some species (beaver and kingfisher, not frog), up to a maximum of 3 individuals per sample.

The resulting dataset should look like this:

#wanted output
loc1 <- c(1,1,2,2,2,3,3,3,3)
date1 <- c(2021, 2022, 2021, 2021, 2022, 2021, 2022, 2023, 2023)
hab1 <- c("w", "l", "w", "w", "w", "l", "w", "w", "w")
spec1 <- c("frog", "frog", "frog", "frog", "frog", "beaver", "beaver", "kingfisher", "kingfisher")
n1 <- c(1,1,1,1,1,2,1,3,2)

df1 <- tibble(loc1, date1, hab1, spec1, n1)

   loc1 date1 hab1  spec1         n1
  <dbl> <dbl> <chr> <chr>      <dbl>
1     1  2021 w     frog           1
2     1  2022 l     frog           1
3     2  2021 w     frog           1
4     2  2021 w     frog           1
5     2  2022 w     frog           1
6     3  2021 l     beaver         2
7     3  2022 w     beaver         1
8     3  2023 w     kingfisher     3
9     3  2023 w     kingfisher     2

Is it possible to do it with dplyr? Or should I use a for loop?

Solution

I've found group_modify() / group_map() to be handy for conditional aggregations.

library(dplyr)
df |> 
  group_by(loc, date, hab, spec) |> 
  group_modify(\(x, key){
    # key: grouping values
    # x: group subset, here just a frame with a single column, `n`
    # check if `spec` for current group is `beaver` or `kingfisher`
    if (key$spec %in% c("beaver", "kingfisher")){
      group_by(x, tri = rep(1:n(), length.out = n(), each = 3)) |> 
        summarise(n1 = sum(n)) |> 
        select(-tri)
    } else {
      rename(x, n1 = n)
    }
  }) |> 
  ungroup()
#> # A tibble: 9 × 5
#>     loc  date hab   spec          n1
#>   <dbl> <dbl> <chr> <chr>      <dbl>
#> 1     1  2021 w     frog           1
#> 2     1  2022 l     frog           1
#> 3     2  2021 w     frog           1
#> 4     2  2021 w     frog           1
#> 5     2  2022 w     frog           1
#> 6     3  2021 l     beaver         2
#> 7     3  2022 w     beaver         1
#> 8     3  2023 w     kingfisher     3
#> 9     3  2023 w     kingfisher     2

With group_modify(..., .f = \(x, key){...} ), x refers to group subset and key arg is a single-row frame with grouping details, e.g.

#> # A tibble: 1 × 4
#>     loc  date hab   spec      
#>   <dbl> <dbl> <chr> <chr>     
#> 1     3  2023 w     kingfisher