rcasemutate

Modify the values ​of a variable


Attached complete data :

id = c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
vm = c("12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24")
GE = c("0", "0", "0", "0", "0", "0", "1", "0", "1", "0", "1","0", "1")
status = c("noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "infection", "noinfection", "infection", "noinfection", "infection", "noinfection", "infection")
fichier <- data.frame(id, vm, GE, status)

We already have values ​​of the GE variable at visits at 18 and 24 months. Unable to remove them from our dataset. How can I change these values ​​so that they take into account previous visits?

GE = 0 if all previous visits are worth 0.

GE = 1 if at least one of the previous visits is worth 1.


Solution

  • A variation of the answer to your previous question, this first changes the 18 and 24 values for vm to NA, then calculates the status column:

    Scenario 1:

    library(dplyr)
    library(tidyr)
    
    id <- c (2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)
    vm <- c("12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24")
    GE <- c("0", "0", "0", "0", "0", "0", "1", "0", "1", "0", "1","0", "1")
    status <- c("noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "noinfection", "infection", "noinfection", "infection", "noinfection", "infection", "noinfection", "infection")
    fichier <- data.frame(id, vm, GE, status)
    
    fichier <- fichier |>
      mutate(across(c(vm, GE), as.integer)) |>
      group_by(id) |> # assuming your data may have multiple id values
      mutate(tmp = if_else(vm <= 18, 1, 2)) |>
      group_by(id, tmp) |>
      mutate(GE = if_else(vm %in% c(18, 24), NA, GE),
             GE = case_when(is.na(GE) & sum(GE, na.rm = TRUE) == 0 ~ 0,
                              is.na(GE) & sum(GE, na.rm = TRUE) > 0 ~ 1,
                              .default = GE),
             status = if_else(GE == 1, "infection", "noinfection")) |>
      ungroup() |>
      select(-starts_with("tmp"))
    
    fichier
    # # A tibble: 13 × 4
    #       id    vm    GE status     
    #    <dbl> <int> <dbl> <chr>      
    #  1     2    12     0 noinfection
    #  2     2    13     0 noinfection
    #  3     2    14     0 noinfection
    #  4     2    15     0 noinfection
    #  5     2    16     0 noinfection
    #  6     2    17     0 noinfection
    #  7     2    18     0 noinfection
    #  8     2    19     0 noinfection
    #  9     2    20     1 infection  
    # 10     2    21     0 noinfection
    # 11     2    22     1 infection  
    # 12     2    23     0 noinfection
    # 13     2    24     1 infection