rdataframedplyrzoo

How to repeat a column value based on a specific row value in R


I have the following data where I need to replace the values of UID column on the condition where the value will repeat until this specific value "Head of Household (Primary)" appears on the Rel column.

For example, OUA000110121995F001 should be repeating for 5 times, following the first time it appeared. Because the 7th row has another UID established based on a new household, where the Rel column has the "Head of Household (Primary)" value.

Below is the data snippet for testing:

structure(list(OU = c("OUA", NA, NA, NA, NA, NA, "OUA", NA, NA, 
NA, NA, NA, "OUA", NA, NA, NA, "OUA", NA, NA, NA, "OUA", NA, 
NA, NA, "OUA", NA, NA, NA, "OUA", NA, NA, NA, NA, NA, NA, NA, 
"OUA", NA, NA, NA, "OUA", NA, NA, NA, NA, "OUA", NA, NA, NA, 
NA, NA, "OUA", NA, NA, NA), Dev = c("Stableford Farms", NA, NA, 
NA, NA, NA, "Stableford Farms", NA, NA, NA, NA, NA, "Stableford Farms", 
NA, NA, NA, "Stableford Farms", NA, NA, NA, "Stableford Farms", 
NA, NA, NA, "Stableford Farms", NA, NA, NA, "Stableford Farms", 
NA, NA, NA, NA, NA, NA, NA, "Stableford Farms", NA, NA, NA, "Stableford Farms", 
NA, NA, NA, NA, "Stableford Farms", NA, NA, NA, NA, NA, "Stableford Farms", 
NA, NA, NA), Age = c(43, 24, 18, 11, 8, NA, 49, 18, 10, 9, 22, 
NA, 43, 23, 11, 18, 57, 56, NA, NA, 48, 16, 15, NA, 46, 19, 16, 
30, 53, 22, 20, 17, 14, 10, 2, NA, 46, 56, 16, 6, 41, 20, 17, 
1, NA, 36, 52, 14, 9, 16, 6, 44, 21, 16, 13), MovDt = structure(c(9474, 
NA, NA, NA, NA, NA, 12706, NA, NA, NA, NA, NA, 14200, NA, NA, 
NA, 9801, NA, NA, NA, 9410, NA, NA, NA, 8681, NA, NA, NA, 10551, 
NA, NA, NA, NA, NA, NA, NA, 14335, NA, NA, NA, 10033, NA, NA, 
NA, NA, 11272, NA, NA, NA, NA, NA, 13771, NA, NA, NA), class = "Date"), 
    Sex = c("Female", "Female", "Male", "Female", "Female", NA, 
    "Female", "Male", "Female", "Male", "Female", NA, "Female", 
    "Female", "Female", "Male", "Female", "Male", NA, NA, "Female", 
    "Female", "Female", NA, "Female", "Male", "Female", "Male", 
    "Female", "Female", "Male", "Female", "Male", "Male", "Female", 
    NA, "Female", "Male", "Female", "Male", "Female", "Female", 
    "Male", "Male", NA, "Female", "Male", "Male", "Male", "Female", 
    "Male", "Female", "Female", "Male", "Female"), Rel = c("Head of Household(Primary)", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    NA, "Head of Household(Primary)", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    "Child of Household; Employed", NA, "Head of Household(Primary)", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    "Child of Household; Employed", "Head of Household(Primary)", 
    "Spouse (Primary)", NA, NA, "Head of Household(Primary)", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    NA, "Head of Household(Primary)", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", "Child of Household; Employed", 
    "Head of Household(Primary)", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", NA, "Head of Household(Primary)", 
    "Spouse (Primary)", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", "Head of Household(Primary)", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", NA, "Head of Household(Primary)", 
    "Spouse (Primary)", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student", "Head of Household(Primary)", 
    "Child of Household; Full Time Student", "Child of Household; Full Time Student", 
    "Child of Household; Full Time Student"), Lang = c("English", 
    NA, NA, NA, NA, NA, "French", NA, NA, NA, NA, NA, "English", 
    NA, NA, NA, "English", NA, NA, NA, "English", NA, NA, NA, 
    "English", NA, NA, NA, "English", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, "English", NA, NA, NA, NA, "English", 
    NA, NA, NA, NA, NA, NA, NA, NA, NA), MarStat = c("Single", 
    NA, NA, NA, NA, NA, "Separated", NA, NA, NA, NA, NA, "Separated", 
    NA, NA, NA, "Married", NA, NA, NA, "Single", NA, NA, NA, 
    "Single", NA, NA, NA, "Separated", NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, "Separated", NA, NA, NA, NA, "Married", 
    NA, NA, NA, NA, NA, "Single", NA, NA, NA), Disab = c(NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_, NA_character_, NA_character_, 
    NA_character_, NA_character_), ...10 = c(NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA), CtznStat = c("Permanent Resident", 
    NA, NA, NA, NA, NA, "Permanent Resident", NA, NA, NA, NA, 
    NA, "Citizen", NA, NA, NA, "Citizen", NA, NA, NA, "Citizen", 
    NA, NA, NA, "Citizen", NA, NA, NA, "Citizen", NA, NA, NA, 
    NA, NA, NA, NA, "Permanent Resident", NA, NA, NA, "Citizen", 
    NA, NA, NA, NA, "Citizen", NA, NA, NA, NA, NA, "Citizen", 
    NA, NA, NA), HHInc = c(22784, NA, NA, NA, NA, NA, 16438.8, 
    NA, NA, NA, NA, NA, 27689.76, NA, NA, NA, 29055.4, NA, NA, 
    NA, 10875.11, NA, NA, NA, 23092.08, NA, NA, NA, 33216, NA, 
    NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, 17158.68, NA, NA, 
    NA, NA, 13764, NA, NA, NA, NA, NA, 21307.32, NA, NA, NA), 
    PerInc = c(11652, 4844, 6288, 0, 0, 0, 16438.8, 0, 0, 0, 
    0, 0, 27689.76, 0, 0, 0, 8062, 145, 20798.4, 50, 10875.11, 
    0, 0, 0, 14992.08, 0, 0, 8100, 9600, 16836, 6780, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 17158.68, 0, 0, 0, 13764, 0, 0, 0, 0, 
    0, 21307.32, 0, 0, 0), InCd = c("OWS2", "PE", "OW1P", NA, 
    NA, NA, "OWS2", NA, NA, NA, NA, "FTE", "ODSP", NA, NA, NA, 
    "SE", "NIPA", "FTE", "NIPA", "SE", "IPA", NA, NA, "ODSP", 
    NA, NA, "ODSP", "SPON", "OWS2", "OW2P", NA, NA, NA, NA, NA, 
    "ODSP", "FTE", NA, NA, "FTE", "EI", NA, NA, NA, "OWS1", NA, 
    NA, NA, NA, NA, "ODSP", NA, NA, NA), InDes = c("Ont Works Scale 2", 
    NA, NA, NA, NA, NA, "Ont Works Scale 2", NA, NA, NA, NA, 
    NA, "Ontario Disability Support Plan", NA, NA, NA, "Self- Employment", 
    NA, NA, NA, "Self- Employment", NA, NA, NA, "Ontario Disability Support Plan", 
    NA, NA, NA, "Sponsor Support", NA, NA, NA, NA, NA, NA, NA, 
    "Ontario Disability Support Plan", NA, NA, NA, "Full-Time Employment", 
    NA, NA, NA, NA, "Ont Works Scale 1", NA, NA, NA, NA, NA, 
    "Ontario Disability Support Plan", NA, NA, NA), AstVal = c(NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_
    ), AstInf = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, 145, NA, 50, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
    ), Postal = c("TORONTO ON M1G3H4", "TORONTO ON M1G3H4", "TORONTO ON M1G3H4", 
    "TORONTO ON M1G3H4", "TORONTO ON M1G3H4", NA, "TORONTO ON M1G3H3", 
    "TORONTO ON M1G3H3", "TORONTO ON M1G3H3", "TORONTO ON M1G3H3", 
    "TORONTO ON M1G3H3", NA, "TORONTO ON M1G3H3", "TORONTO ON M1G3H3", 
    "TORONTO ON M1G3H3", "TORONTO ON M1G3H3", "TORONTO ON M1G2Z3", 
    "TORONTO ON M1G2Z3", "TORONTO ON M1G2Z3", "TORONTO ON M1G2Z3", 
    "TORONTO ON M1G2Y6", "TORONTO ON M1G2Y6", "TORONTO ON M1G2Y6", 
    NA, "TORONTO ON M1G2Y2", "TORONTO ON M1G2Y2", "TORONTO ON M1G2Y2", 
    "TORONTO ON M1G2Y2", "TORONTO ON M1G3C1", "TORONTO ON M1G3C1", 
    "TORONTO ON M1G3C1", "TORONTO ON M1G3C1", "TORONTO ON M1G3C1", 
    "TORONTO ON M1G3C1", "TORONTO ON M1G3C1", NA, "TORONTO ON M1G2Y5", 
    "TORONTO ON M1G2Y5", "TORONTO ON M1G2Y5", "TORONTO ON M1G2Y5", 
    "TORONTO ON M1G3C3", "TORONTO ON M1G3C3", "TORONTO ON M1G3C3", 
    "TORONTO ON M1G3C3", NA, "TORONTO ON M1G3B6", "TORONTO ON M1G3B6", 
    "TORONTO ON M1G3B6", "TORONTO ON M1G3B6", "TORONTO ON M1G3B6", 
    "TORONTO ON M1G3B6", "TORONTO ON M1G3H3", "TORONTO ON M1G3H3", 
    "TORONTO ON M1G3H3", "TORONTO ON M1G3H3"), Rent = c(311, 
    NA, NA, NA, NA, NA, 269, NA, NA, NA, NA, NA, 278, NA, NA, 
    NA, 682, NA, NA, NA, 227, NA, NA, NA, 345, NA, NA, NA, 1519, 
    NA, NA, NA, NA, NA, NA, NA, 278, NA, NA, NA, 384, NA, NA, 
    NA, NA, 339, NA, NA, NA, NA, NA, 278, NA, NA, NA), Util = c(0, 
    NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA, 0, NA, NA, NA, 
    0, NA, NA, NA, 0, NA, NA, NA, 0, NA, NA, NA, 0, NA, NA, NA, 
    NA, NA, NA, NA, 0, NA, NA, NA, 0, NA, NA, NA, NA, 0, NA, 
    NA, NA, NA, NA, 0, NA, NA, NA), Park = c(NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
    NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), HHMbrs = c(5, 
    NA, NA, NA, NA, NA, 5, NA, NA, NA, NA, NA, 4, NA, NA, NA, 
    2, NA, NA, NA, 3, NA, NA, NA, 4, NA, NA, NA, 7, NA, NA, NA, 
    NA, NA, NA, NA, 4, NA, NA, NA, 4, NA, NA, NA, NA, 6, NA, 
    NA, NA, NA, NA, 4, NA, NA, NA), Bdr = c(3, NA, NA, NA, NA, 
    NA, 5, NA, NA, NA, NA, NA, 5, NA, NA, NA, 3, NA, NA, NA, 
    3, NA, NA, NA, 4, NA, NA, NA, 5, NA, NA, NA, NA, NA, NA, 
    NA, 3, NA, NA, NA, 4, NA, NA, NA, NA, 4, NA, NA, NA, NA, 
    NA, 3, NA, NA, NA), FMR = c(1114, NA, NA, NA, NA, NA, 1519, 
    NA, NA, NA, NA, NA, 1519, NA, NA, NA, 1114, NA, NA, NA, 1114, 
    NA, NA, NA, 1317, NA, NA, NA, 1519, NA, NA, NA, NA, NA, NA, 
    NA, 1114, NA, NA, NA, 1317, NA, NA, NA, NA, 1317, NA, NA, 
    NA, NA, NA, 1114, NA, NA, NA), FMRT = c("N", NA, NA, NA, 
    NA, NA, "N", NA, NA, NA, NA, NA, "N", NA, NA, NA, "N", NA, 
    NA, NA, "N", NA, NA, NA, "N", NA, NA, NA, "N", NA, NA, NA, 
    NA, NA, NA, NA, "N", NA, NA, NA, "N", NA, NA, NA, NA, "N", 
    NA, NA, NA, NA, NA, "N", NA, NA, NA), Dev_ID = c("0001", 
    "0002", "0002", "0002", "0002", "0002", "0001", "0002", "0002", 
    "0002", "0002", "0002", "0001", "0002", "0002", "0002", "0001", 
    "0002", "0002", "0002", "0001", "0002", "0002", "0002", "0001", 
    "0002", "0002", "0002", "0001", "0002", "0002", "0002", "0002", 
    "0002", "0002", "0002", "0001", "0002", "0002", "0002", "0001", 
    "0002", "0002", "0002", "0002", "0001", "0002", "0002", "0002", 
    "0002", "0002", "0001", "0002", "0002", "0002"), MovDt_Char = c("10121995", 
    NA, NA, NA, NA, NA, "15102004", NA, NA, NA, NA, NA, "17112008", 
    NA, NA, NA, "01111996", NA, NA, NA, "07101995", NA, NA, NA, 
    "08101993", NA, NA, NA, "21111998", NA, NA, NA, NA, NA, NA, 
    NA, "01042009", NA, NA, NA, "21061997", NA, NA, NA, NA, "11112000", 
    NA, NA, NA, NA, NA, "15092007", NA, NA, NA), Sex_Initial = c("F", 
    "F", "M", "F", "F", NA, "F", "M", "F", "M", "F", NA, "F", 
    "F", "F", "M", "F", "M", NA, NA, "F", "F", "F", NA, "F", 
    "M", "F", "M", "F", "F", "M", "F", "M", "M", "F", NA, "F", 
    "M", "F", "M", "F", "F", "M", "M", NA, "F", "M", "M", "M", 
    "F", "M", "F", "F", "M", "F"), Lang_ID = c("001", "002", 
    "002", "002", "002", "002", "003", "002", "002", "002", "002", 
    "002", "001", "002", "002", "002", "001", "002", "002", "002", 
    "001", "002", "002", "002", "001", "002", "002", "002", "001", 
    "002", "002", "002", "002", "002", "002", "002", "002", "002", 
    "002", "002", "001", "002", "002", "002", "002", "001", "002", 
    "002", "002", "002", "002", "002", "002", "002", "002"), 
    UID = c("OUA000110121995F001", "NA0002NAF002", "NA0002NAM002", 
    "NA0002NAF002", "NA0002NAF002", "NA0002NANA002", "OUA000115102004F003", 
    "NA0002NAM002", "NA0002NAF002", "NA0002NAM002", "NA0002NAF002", 
    "NA0002NANA002", "OUA000117112008F001", "NA0002NAF002", "NA0002NAF002", 
    "NA0002NAM002", "OUA000101111996F001", "NA0002NAM002", "NA0002NANA002", 
    "NA0002NANA002", "OUA000107101995F001", "NA0002NAF002", "NA0002NAF002", 
    "NA0002NANA002", "OUA000108101993F001", "NA0002NAM002", "NA0002NAF002", 
    "NA0002NAM002", "OUA000121111998F001", "NA0002NAF002", "NA0002NAM002", 
    "NA0002NAF002", "NA0002NAM002", "NA0002NAM002", "NA0002NAF002", 
    "NA0002NANA002", "OUA000101042009F002", "NA0002NAM002", "NA0002NAF002", 
    "NA0002NAM002", "OUA000121061997F001", "NA0002NAF002", "NA0002NAM002", 
    "NA0002NAM002", "NA0002NANA002", "OUA000111112000F001", "NA0002NAM002", 
    "NA0002NAM002", "NA0002NAM002", "NA0002NAF002", "NA0002NAM002", 
    "OUA000115092007F002", "NA0002NAF002", "NA0002NAM002", "NA0002NAF002"
    )), row.names = c(NA, -55L), class = c("tbl_df", "tbl", "data.frame"
))

The result is:

print(df[1:10, 30])
# A tibble: 10 × 1
   UID                
   <chr>              
 1 OUA000110121995F001
 2 NA0002NAF002       
 3 NA0002NAM002       
 4 NA0002NAF002       
 5 NA0002NAF002       
 6 NA0002NANA002      
 7 OUA000115102004F003
 8 NA0002NAM002       
 9 NA0002NAF002       
10 NA0002NAM002  

I tried this code and instead, I got NAs filled out for the rows that should have UIDs for the respective households.

df <- df %>%
  group_by(Dev) %>%
  mutate(
    # Carry forward the UID from "Head of Household(Primary)" rows
    UID_temp = ifelse(Rel == "Head of Household(Primary)", UID, NA),
    UID = zoo::na.locf(UID_temp, na.rm = FALSE, fromLast = FALSE)
  ) %>%
  ungroup() %>%
  select(-UID_temp)

The result is:

> print(df[1:10, 30])
# A tibble: 10 × 1
   UID                
   <chr>              
 1 OUA000110121995F001
 2 NA                 
 3 NA                 
 4 NA                 
 5 NA                 
 6 NA                 
 7 OUA000115102004F003
 8 NA                 
 9 NA                 
10 NA  

I have over 200k rows hence manual adjustment is not possible.


Solution

  • If you convert all your unwanted UID values to NA first, you can use tidyr::fill():

    library(dplyr)
    library(tidyr)
    
    result <- df |>
      mutate(UID = if_else(Rel == "Head of Household(Primary)", UID, NA)) |>
      fill(UID, .direction = "down")
    
    result[, "UID"]
    # # A tibble: 55 × 1
    #    UID                
    #    <chr>              
    #  1 OUA000110121995F001
    #  2 OUA000110121995F001
    #  3 OUA000110121995F001
    #  4 OUA000110121995F001
    #  5 OUA000110121995F001
    #  6 OUA000110121995F001
    #  7 OUA000115102004F003
    #  8 OUA000115102004F003
    #  9 OUA000115102004F003
    # 10 OUA000115102004F003
    # # ℹ 45 more rows
    # # ℹ Use `print(n = ...)` to see more rows