I have the following data frame, with column containing block identifiers, well identifiers, and concentration values of test divided per column.
df <- structure(list(block = c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2), Well = c("A01",
"A02", "A03", "A04", "A05", "A01", "A02", "A03", "A04", "A05"
), Conc.test1 = c(2, NA, 2, NA, NA, 2, NA, 2, NA, NA), Conc.test2 = c(NA,
2, NA, 2, NA, NA, 2, NA, 2, NA), Conc.Test3 = c(NA, NA, NA, 2,
NA, NA, NA, NA, 2, NA), Conc.test4 = c(NA, NA, 2, 2, NA, 2, NA,
2, 2, NA), Conc.test5 = c(NA, 2, NA, NA, NA, NA, 2, NA, NA, NA
), Conc.test6 = c(2, NA, NA, NA, NA, 2, NA, NA, NA, NA)), row.names = c(NA,
10L), class = "data.frame")
# block Well Conc.test1 Conc.test2 Conc.Test3 Conc.test4 Conc.test5 Conc.test6
# 1 1 A01 2 NA NA NA NA 2
# 2 1 A02 NA 2 NA NA 2 NA
# 3 1 A03 2 NA NA 2 NA NA
# 4 1 A04 NA 2 2 2 NA NA
# 5 1 A05 NA NA NA NA NA NA
# 6 2 A01 2 NA NA 2 NA 2
# 7 2 A02 NA 2 NA NA 2 NA
# 8 2 A03 2 NA NA 2 NA NA
# 9 2 A04 NA 2 2 2 NA NA
# 10 2 A05 NA NA NA NA NA NA
I would like to find per Block, per well the different tests used (and the amount of tests) and the concentration used, in the following format:
structure(list(block = c(1, 1, 1, 1, 2, 2, 2, 2), tests = c(2,
2, 2, 3, 3, 2, 2, 3), well = c("A01", "A02", "A03", "A04", "A01",
"A02", "A03", "A04"), C1 = c("test1", "test2", "test1", "test2",
"test1", "test2", "test1", "test2"), conc1 = c(2, 2, 2, 2, 2,
2, 2, 2), C2 = c("test6", "test5", "test4", "test3", "test6",
"test5", "test4", "test3"), con2 = c(2, 2, 2, 2, 2, 2, 2, 2),
C3 = c(NA, NA, NA, "test4", "test4", NA, NA, "test4"), conc3 = c(NA,
NA, NA, 2, 2, NA, NA, 2)), row.names = c(NA, 8L), class = "data.frame")
# block tests well C1 conc1 C2 con2 C3 conc3
# 1 1 2 A01 test1 2 test6 2 <NA> NA
# 2 1 2 A02 test2 2 test5 2 <NA> NA
# 3 1 2 A03 test1 2 test4 2 <NA> NA
# 4 1 3 A04 test2 2 test3 2 test4 2
# 5 2 3 A01 test1 2 test6 2 test4 2
# 6 2 2 A02 test2 2 test5 2 <NA> NA
# 7 2 2 A03 test1 2 test4 2 <NA> NA
# 8 2 3 A04 test2 2 test3 2 test4 2
I got this far, which kind of works, but I don't see how scalable this is.
output_df <- df %>%
pivot_longer(cols = starts_with("Conc"), names_to = "Test", values_to = "Concentration") %>%
filter(!is.na(Concentration)) %>%
group_by(block, Well) %>%
mutate(test_num = row_number()) %>%
pivot_wider(names_from = test_num, values_from = c(Test, Concentration),
names_sep = "") %>%
ungroup()
output_df <- output_df %>%
mutate(tests = rowSums(!is.na(select(., starts_with("Test"))))) %>%
select(block, Well, tests, everything()) %>%
rename_with(~gsub("Test", "C", .x), starts_with("Test")) %>%
rename_with(~gsub("Concentration", "conc", .x), starts_with("Concentration"))
You need pivot_longer
at first, creating index column by groups, and then pivot_wider
.
library(dplyr)
library(tidyr)
df %>%
pivot_longer(Conc.test1:Conc.test6, names_prefix = "Conc.", names_to = "C",
values_to = "conc", values_drop_na = TRUE) %>%
mutate(id = row_number(), tests = n(), .by = c(block, Well)) %>%
pivot_wider(names_from = id, values_from = c(C, conc),
names_vary = "slowest", names_sep = "")
# # A tibble: 8 × 9
# block Well tests C1 conc1 C2 conc2 C3 conc3
# <dbl> <chr> <int> <chr> <dbl> <chr> <dbl> <chr> <dbl>
# 1 1 A01 2 test1 2 test6 2 NA NA
# 2 1 A02 2 test2 2 test5 2 NA NA
# 3 1 A03 2 test1 2 test4 2 NA NA
# 4 1 A04 3 test2 2 Test3 2 test4 2
# 5 2 A01 3 test1 2 test4 2 test6 2
# 6 2 A02 2 test2 2 test5 2 NA NA
# 7 2 A03 2 test1 2 test4 2 NA NA
# 8 2 A04 3 test2 2 Test3 2 test4 2