Despite searching many sources, I can't find a solution for my problem.
My dataframe has more than 1000 rows and 800 columns, knowing that starting from the 5th column (dat0$V4
), there are always the same 8 columns per analyte (as there are about 100 analytes, so there are about 8*100 = 800 columns).
For each of these 8 columns, I would like the name of the analyte as a prefix (here CK
from the 5th dat0$V4
to the 12th columns dat0$V11
, then TP
from the 13th dat0$V12
to the 20th columns dat0$V19
, etc...), while switching the analyte code to a suffix for the header of the 1st column of each analyte (here dat0$V4
would become dat1$CK_126
and dat0$V12
would become dat1$TP_233
).
As expected, in the end the header switch would remove 2 rows.
Does anyone have any ideas?
dat0:
dat0 <-
structure(list(file = c("csv", "csv", "csv", "csv", "csv", "csv",
"csv", "csv", "csv", "csv"), V1 = c(NA, "run", NA, NA, NA, NA,
NA, NA, NA, NA), V2 = c(NA, "type", NA, NA, NA, NA, NA, NA, NA,
NA), V3 = c(NA, "nb", NA, NA, NA, NA, NA, NA, NA, NA), V4 = c("126",
"result", "25", "12", "86", "118", "35", "71", "18", "56"), V5 = c("CK",
"unit", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L"
), V6 = c(NA, "am", NA, NA, NA, NA, NA, NA, "aaa", NA), V7 = c(NA,
"mg", NA, NA, NA, NA, NA, NA, NA, NA), V8 = c(NA, "vo", NA, NA,
"bbb", NA, NA, NA, NA, NA), V9 = c(NA, "mo", NA, NA, NA, NA,
NA, NA, NA, NA), V10 = c(NA, "da", NA, NA, NA, NA, "ccc", NA,
NA, NA), V11 = c(NA, "pr", NA, NA, NA, NA, NA, NA, NA, NA), V12 = c("233",
"result", "36", "102", "16", "74", "44", "39", "125", "50"),
V13 = c("TP", "unit", "g/L", "g/L", "g/L", "g/L", "g/L",
"g/L", "g/L", "g/L"), V14 = c(NA, "am", NA, NA, NA, NA, NA,
NA, NA, NA), V15 = c(NA, "mg", NA, "ddd", NA, NA, NA, NA,
NA, NA), V16 = c(NA, "vo", NA, NA, NA, NA, NA, NA, NA, NA
), V17 = c(NA, "mo", NA, NA, NA, NA, NA, NA, NA, "eee"),
V18 = c(NA, "da", NA, NA, NA, NA, NA, NA, NA, NA), V19 = c(NA,
"pr", NA, NA, NA, "fff", NA, NA, NA, NA)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -10L))
dat1:
dat1 <-
structure(list(file = c("csv", "csv", "csv", "csv", "csv", "csv",
"csv", "csv"), run = c(NA, NA, NA, NA, NA, NA, NA, NA), type = c(NA,
NA, NA, NA, NA, NA, NA, NA), nb = c(NA, NA, NA, NA, NA, NA, NA,
NA), CK_126 = c(25, 12, 86, 118, 35, 71, 18, 56), CK_unit = c("U/L",
"U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L"), CK_am = c(NA,
NA, NA, NA, NA, NA, "aaa", NA), CK_mg = c(NA, NA, NA, NA, NA,
NA, NA, NA), CK_vo = c(NA, NA, "bbb", NA, NA, NA, NA, NA), CK_mo = c(NA,
NA, NA, NA, NA, NA, NA, NA), CK_da = c(NA, NA, NA, NA, "ccc",
NA, NA, NA), CK_pr = c(NA, NA, NA, NA, NA, NA, NA, NA), TP_233 = c(36,
102, 16, 74, 44, 39, 125, 50), TP_unit = c("g/L", "g/L", "g/L",
"g/L", "g/L", "g/L", "g/L", "g/L"), TP_am = c(NA, NA, NA, NA,
NA, NA, NA, NA), TP_mg = c(NA, "ddd", NA, NA, NA, NA, NA, NA),
TP_vo = c(NA, NA, NA, NA, NA, NA, NA, NA), TP_mo = c(NA,
NA, NA, NA, NA, NA, NA, "eee"), TP_da = c(NA, NA, NA, NA,
NA, NA, NA, NA), TP_pr = c(NA, NA, NA, "fff", NA, NA, NA,
NA)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-8L))
Thanks for help
Something like this:
dat0 <-
structure(list(file = c("csv", "csv", "csv", "csv", "csv", "csv", "csv", "csv", "csv", "csv"),
V1 = c(NA, "run", NA, NA, NA, NA, NA, NA, NA, NA),
V2 = c(NA, "type", NA, NA, NA, NA, NA, NA, NA, NA),
V3 = c(NA, "nb", NA, NA, NA, NA, NA, NA, NA, NA),
V4 = c("126", "result", "25", "12", "86", "118", "35", "71", "18", "56"),
V5 = c("CK", "unit", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L", "U/L"),
V6 = c(NA, "am", NA, NA, NA, NA, NA, NA, "aaa", NA),
V7 = c(NA, "mg", NA, NA, NA, NA, NA, NA, NA, NA),
V8 = c(NA, "vo", NA, NA, "bbb", NA, NA, NA, NA, NA),
V9 = c(NA, "mo", NA, NA, NA, NA, NA, NA, NA, NA),
V10 = c(NA, "da", NA, NA, NA, NA, "ccc", NA, NA, NA),
V11 = c(NA, "pr", NA, NA, NA, NA, NA, NA, NA, NA),
V12 = c("233", "result", "36", "102", "16", "74", "44", "39", "125", "50"),
V13 = c("TP", "unit", "g/L", "g/L", "g/L", "g/L", "g/L", "g/L", "g/L", "g/L"),
V14 = c(NA, "am", NA, NA, NA, NA, NA, NA, NA, NA),
V15 = c(NA, "mg", NA, "ddd", NA, NA, NA, NA, NA, NA),
V16 = c(NA, "vo", NA, NA, NA, NA, NA, NA, NA, NA),
V17 = c(NA, "mo", NA, NA, NA, NA, NA, NA, NA, "eee"),
V18 = c(NA, "da", NA, NA, NA, NA, NA, NA, NA, NA),
V19 = c(NA, "pr", NA, NA, NA, "fff", NA, NA, NA, NA)),
class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -10L))
dat1 = dat0[-(1:2),]
analyte_pref = as.character(dat0[1, seq(6, ncol(dat0), 8)])
analyte_code = as.character(dat0[1, seq(5, ncol(dat0), 8)])
first_cols = as.character(dat0[2, 1:4])
first_cols[1] = "file"
result_cols = as.character(dat0[2, 5:ncol(dat0)])
result_cols[seq(1, length(result_cols), 8)] = analyte_code
result_cols = paste(rep(analyte_pref, each = 8), result_cols, sep = "_")
colnames(dat1) = c(first_cols, result_cols)
knitr::kable(dat1)
file | run | type | nb | CK_126 | CK_unit | CK_am | CK_mg | CK_vo | CK_mo | CK_da | CK_pr | TP_233 | TP_unit | TP_am | TP_mg | TP_vo | TP_mo | TP_da | TP_pr | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3 | csv | NA | NA | NA | 25 | U/L | NA | NA | NA | NA | NA | NA | 36 | g/L | NA | NA | NA | NA | NA | NA |
4 | csv | NA | NA | NA | 12 | U/L | NA | NA | NA | NA | NA | NA | 102 | g/L | NA | ddd | NA | NA | NA | NA |
5 | csv | NA | NA | NA | 86 | U/L | NA | NA | bbb | NA | NA | NA | 16 | g/L | NA | NA | NA | NA | NA | NA |
6 | csv | NA | NA | NA | 118 | U/L | NA | NA | NA | NA | NA | NA | 74 | g/L | NA | NA | NA | NA | NA | fff |
7 | csv | NA | NA | NA | 35 | U/L | NA | NA | NA | NA | ccc | NA | 44 | g/L | NA | NA | NA | NA | NA | NA |
8 | csv | NA | NA | NA | 71 | U/L | NA | NA | NA | NA | NA | NA | 39 | g/L | NA | NA | NA | NA | NA | NA |
9 | csv | NA | NA | NA | 18 | U/L | aaa | NA | NA | NA | NA | NA | 125 | g/L | NA | NA | NA | NA | NA | NA |
10 | csv | NA | NA | NA | 56 | U/L | NA | NA | NA | NA | NA | NA | 50 | g/L | NA | NA | NA | eee | NA | NA |
Created on 2024-12-03 with reprex v2.0.2