rdplyrlogical-or

check for a value to pick an ID in r


I have total 10 columns in a df and for 3 to 10 columns I need to check if there is a value 0 or 1 row wise. If there is value 1 in any of rows then select the id (R language)

test<-df %>%
group_by(id) %>%
filter(all(if_any(sympt3:sympt10, as.logical)))


             id          date                            
                                           symptom1
             1       2011-08-27                0
             2       2012-02-21                1
             3       2012-07-12                0
             4       2012-07-25                0
             5       2010-12-31                0
             6       2011-01-02                0
             7       2012-08-21                0
             8       2013-03-26                0
             9       2012-11-07                0
            10       2010-11-22                0
                       symptom 2                   symptom 3
1                          0                         0
2                          1                         1
3                          0                         0
4                          0                         0
5                          0                         0
6                          0                         1
7                          0                         0
8                          1                         0
9                          0                         0
10                         0                         0
                   symptom 5.        symptom 6
1                          1                         0
2                          0                         1
3                          0                         0
4                          0                         0
5                          0                         0
6                          1                         0
7                          0                         1
8                          0                         0
9                          0                         0
10                         0                         1
              symptom 7       symptom 8
1                          1                         0
2                          0                         1
3                          0                         0
4                          0                         0
5                          1                         0
6                          0                         0
7                          1                         1
8                          0                         0
9                          0                         0
10                         1                         0
  

I'm not sure if I'm selecting the ids which has value 1 from any of the columns I need?

Any help is appreciated


Solution

  • Try these options.

    (I vary between "not equal to 0" and "equal to 1", depending on all needs. They both work, I chose to vary for demonstrative purposes.)

    base R

    dat[rowSums(subset(dat, select = grep("^symptom", names(dat), value = TRUE)) != 0) > 0,]
    #    id       date symptom1 symptom2 symptom3 symptom5 symptom6 symptom7 symptom8
    # 1   1 2011-08-27        0        0        0        1        0        1        0
    # 2   2 2012-02-21        1        1        1        0        1        0        1
    # 5   5 2010-12-31        0        0        0        0        0        1        0
    # 6   6 2011-01-02        0        0        1        1        0        0        0
    # 7   7 2012-08-21        0        0        0        0        1        1        1
    # 8   8 2013-03-26        0        1        0        0        0        0        0
    # 10 10 2010-11-22        0        0        0        0        1        1        0
    

    dplyr

    library(dplyr)
    dat %>%
      filter(if_any(starts_with("symptom"), ~ . == 1))
    #   id       date symptom1 symptom2 symptom3 symptom5 symptom6 symptom7 symptom8
    # 1  1 2011-08-27        0        0        0        1        0        1        0
    # 2  2 2012-02-21        1        1        1        0        1        0        1
    # 3  5 2010-12-31        0        0        0        0        0        1        0
    # 4  6 2011-01-02        0        0        1        1        0        0        0
    # 5  7 2012-08-21        0        0        0        0        1        1        1
    # 6  8 2013-03-26        0        1        0        0        0        0        0
    # 7 10 2010-11-22        0        0        0        0        1        1        0
    

    Data

    dat <- structure(list(id = 1:10, date = c("2011-08-27", "2012-02-21", "2012-07-12", "2012-07-25", "2010-12-31", "2011-01-02", "2012-08-21", "2013-03-26", "2012-11-07", "2010-11-22"), symptom1 = c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), symptom2 = c(0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L), symptom3 = c(0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), symptom5 = c(1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L), symptom6 = c(0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L), symptom7 = c(1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L), symptom8 = c(0L,  1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L)), class = "data.frame", row.names = c(NA, -10L))