rdplyrrename

How to switch cell content to column header crosswise while renaming them with prefix in R?


Despite searching many sources, I can't find a solution for my problem.

My dataframe has more than 1000 rows and 800 columns, knowing that starting from the 5th column (dat0$V4), there are always the same 8 columns per analyte (as there are about 100 analytes, so there are about 8*100 = 800 columns).
For each of these 8 columns, I would like the name of the analyte as a prefix (here CK from the 5th dat0$V4 to the 12th columns dat0$V11, then TP from the 13th dat0$V12 to the 20th columns dat0$V19, etc...), while switching the analyte code to a suffix for the header of the 1st column of each analyte (here dat0$V4 would become dat1$CK_126 and dat0$V12 would become dat1$TP_233).
As expected, in the end the header switch would remove 2 rows.

Does anyone have any ideas?

Initial data (dat0):
enter image description here

Desired output (dat1):
enter image description here

dat0:

dat0 <-
structure(list(file = c("csv", "csv", "csv", "csv", "csv", "csv", 
"csv", "csv", "csv", "csv"), V1 = c(NA, "run", NA, NA, NA, NA, 
NA, NA, NA, NA), V2 = c(NA, "type", NA, NA, NA, NA, NA, NA, NA, 
NA), V3 = c(NA, "nb", NA, NA, NA, NA, NA, NA, NA, NA), V4 = c("126", 
"result", "25", "12", "86", "118", "35", "71", "18", "56"), V5 = c("CK", 
"unit", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L"
), V6 = c(NA, "am", NA, NA, NA, NA, NA, NA, "aaa", NA), V7 = c(NA, 
"mg", NA, NA, NA, NA, NA, NA, NA, NA), V8 = c(NA, "vo", NA, NA, 
"bbb", NA, NA, NA, NA, NA), V9 = c(NA, "mo", NA, NA, NA, NA, 
NA, NA, NA, NA), V10 = c(NA, "da", NA, NA, NA, NA, "ccc", NA, 
NA, NA), V11 = c(NA, "pr", NA, NA, NA, NA, NA, NA, NA, NA), V12 = c("233", 
"result", "36", "102", "16", "74", "44", "39", "125", "50"), 
    V13 = c("TP", "unit", "g/L", "g/L", "g/L", "g/L", "g/L", 
    "g/L", "g/L", "g/L"), V14 = c(NA, "am", NA, NA, NA, NA, NA, 
    NA, NA, NA), V15 = c(NA, "mg", NA, "ddd", NA, NA, NA, NA, 
    NA, NA), V16 = c(NA, "vo", NA, NA, NA, NA, NA, NA, NA, NA
    ), V17 = c(NA, "mo", NA, NA, NA, NA, NA, NA, NA, "eee"), 
    V18 = c(NA, "da", NA, NA, NA, NA, NA, NA, NA, NA), V19 = c(NA, 
    "pr", NA, NA, NA, "fff", NA, NA, NA, NA)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -10L)) 

dat1:

dat1 <-
structure(list(file = c("csv", "csv", "csv", "csv", "csv", "csv", 
"csv", "csv"), run = c(NA, NA, NA, NA, NA, NA, NA, NA), type = c(NA, 
NA, NA, NA, NA, NA, NA, NA), nb = c(NA, NA, NA, NA, NA, NA, NA, 
NA), CK_126 = c(25, 12, 86, 118, 35, 71, 18, 56), CK_unit = c("U/L", 
"U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L"), CK_am = c(NA, 
NA, NA, NA, NA, NA, "aaa", NA), CK_mg = c(NA, NA, NA, NA, NA, 
NA, NA, NA), CK_vo = c(NA, NA, "bbb", NA, NA, NA, NA, NA), CK_mo = c(NA, 
NA, NA, NA, NA, NA, NA, NA), CK_da = c(NA, NA, NA, NA, "ccc", 
NA, NA, NA), CK_pr = c(NA, NA, NA, NA, NA, NA, NA, NA), TP_233 = c(36, 
102, 16, 74, 44, 39, 125, 50), TP_unit = c("g/L", "g/L", "g/L", 
"g/L", "g/L", "g/L", "g/L", "g/L"), TP_am = c(NA, NA, NA, NA, 
NA, NA, NA, NA), TP_mg = c(NA, "ddd", NA, NA, NA, NA, NA, NA), 
    TP_vo = c(NA, NA, NA, NA, NA, NA, NA, NA), TP_mo = c(NA, 
    NA, NA, NA, NA, NA, NA, "eee"), TP_da = c(NA, NA, NA, NA, 
    NA, NA, NA, NA), TP_pr = c(NA, NA, NA, "fff", NA, NA, NA, 
    NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-8L))

Thanks for help


Solution

  • Something like this:

    dat0 <-
      structure(list(file = c("csv", "csv", "csv", "csv", "csv", "csv", "csv", "csv", "csv", "csv"), 
                     V1 = c(NA, "run", NA, NA, NA, NA, NA, NA, NA, NA), 
                     V2 = c(NA, "type", NA, NA, NA, NA, NA, NA, NA, NA), 
                     V3 = c(NA, "nb", NA, NA, NA, NA, NA, NA, NA, NA), 
                     V4 = c("126", "result", "25", "12", "86", "118", "35", "71", "18", "56"), 
                     V5 = c("CK", "unit", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L"), 
                     V6 = c(NA, "am", NA, NA, NA, NA, NA, NA, "aaa", NA), 
                     V7 = c(NA, "mg", NA, NA, NA, NA, NA, NA, NA, NA), 
                     V8 = c(NA, "vo", NA, NA, "bbb", NA, NA, NA, NA, NA), 
                     V9 = c(NA, "mo", NA, NA, NA, NA, NA, NA, NA, NA), 
                     V10 = c(NA, "da", NA, NA, NA, NA, "ccc", NA, NA, NA), 
                     V11 = c(NA, "pr", NA, NA, NA, NA, NA, NA, NA, NA), 
                     V12 = c("233", "result", "36", "102", "16", "74", "44", "39", "125", "50"), 
                     V13 = c("TP", "unit", "g/L", "g/L", "g/L", "g/L", "g/L", "g/L", "g/L", "g/L"), 
                     V14 = c(NA, "am", NA, NA, NA, NA, NA, NA, NA, NA), 
                     V15 = c(NA, "mg", NA, "ddd", NA, NA, NA, NA, NA, NA), 
                     V16 = c(NA, "vo", NA, NA, NA, NA, NA, NA, NA, NA), 
                     V17 = c(NA, "mo", NA, NA, NA, NA, NA, NA, NA, "eee"), 
                     V18 = c(NA, "da", NA, NA, NA, NA, NA, NA, NA, NA), 
                     V19 = c(NA, "pr", NA, NA, NA, "fff", NA, NA, NA, NA)), 
                class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L))
    
    dat1 = dat0[-(1:2),]
    
    analyte_pref = as.character(dat0[1, seq(6, ncol(dat0), 8)])
    analyte_code = as.character(dat0[1, seq(5, ncol(dat0), 8)])
    first_cols = as.character(dat0[2, 1:4])
    first_cols[1] = "file"
    result_cols = as.character(dat0[2, 5:ncol(dat0)])
    result_cols[seq(1, length(result_cols), 8)] = analyte_code
    result_cols = paste(rep(analyte_pref, each = 8), result_cols, sep = "_")
    
    colnames(dat1) = c(first_cols, result_cols)
    
    knitr::kable(dat1)
    
    file run type nb CK_126 CK_unit CK_am CK_mg CK_vo CK_mo CK_da CK_pr TP_233 TP_unit TP_am TP_mg TP_vo TP_mo TP_da TP_pr
    3 csv NA NA NA 25 U/L NA NA NA NA NA NA 36 g/L NA NA NA NA NA NA
    4 csv NA NA NA 12 U/L NA NA NA NA NA NA 102 g/L NA ddd NA NA NA NA
    5 csv NA NA NA 86 U/L NA NA bbb NA NA NA 16 g/L NA NA NA NA NA NA
    6 csv NA NA NA 118 U/L NA NA NA NA NA NA 74 g/L NA NA NA NA NA fff
    7 csv NA NA NA 35 U/L NA NA NA NA ccc NA 44 g/L NA NA NA NA NA NA
    8 csv NA NA NA 71 U/L NA NA NA NA NA NA 39 g/L NA NA NA NA NA NA
    9 csv NA NA NA 18 U/L aaa NA NA NA NA NA 125 g/L NA NA NA NA NA NA
    10 csv NA NA NA 56 U/L NA NA NA NA NA NA 50 g/L NA NA NA eee NA NA

    Created on 2024-12-03 with reprex v2.0.2