I have a dataset like this:
dat1 <- read.table(text = "
nodepair 3 4 5
1 A6_A1 2 5 1
2 A6_A2 2 5 1
3 A6_A3 2 5 1
4 AL_A1 1 0 0
5 D_A6 0 3 0
6 F_A1 1 0 1
7 H_D 0 0 2
8 H_H 0 0 2
", header = TRUE, check.names = FALSE)
And I need to write a program to efficiently create new columns which sums each pairwise comparison when a zero appears between pairs-- specifically to create the 'b' condition is when a nodepair is unique to the first variable but not the second, and the 'c' condition is when a nodepair is unique to the second and not the first. Here is the desired output:
dat2 <- read.table(text = "
nodepair 3 4 5 3-4b 3-4c 3-5b 3-5c 4-5b 4-5c
1 A6_A1 2 5 1 NA NA NA NA NA NA
2 A6_A2 2 5 1 NA NA NA NA NA NA
3 A6_A3 2 5 1 NA NA NA NA NA NA
4 AL_A1 1 0 0 1 NA 1 NA 0 0
5 D_A6 0 3 0 NA 3 0 0 3 NA
6 F_A1 1 0 1 1 NA NA NA NA 1
7 H_D 0 0 2 0 0 NA 2 NA 2
8 H_H 0 0 2 0 0 NA 2 NA 2
", header = TRUE, check.names = FALSE)
This code will work but is certainly less than ideal for my much larger dataset:
dat1 <- dat1 %>%
mutate('3-4b' = case_when(`4` == 0 ~ as.integer(rowSums(across(c(`3`,`4`)))))) %>%
mutate('3-4c' = case_when(`3` == 0 ~ as.integer(rowSums(across(c(`3`,`4`)))))) %>%
mutate('3-5b' = case_when(`5` == 0 ~ as.integer(rowSums(across(c(`3`,`5`)))))) %>%
mutate('3-5c' = case_when(`3` == 0 ~ as.integer(rowSums(across(c(`3`,`5`)))))) %>%
mutate('4-5b' = case_when(`5` == 0 ~ as.integer(rowSums(across(c(`4`,`5`)))))) %>%
mutate('4-5c' = case_when(`4` == 0 ~ as.integer(rowSums(across(c(`4`,`5`))))))
library(dplyr)
library(purrr)
t(combn(names(dat1)[-1], 2)) %>%
rbind(., .[,c(2,1)]) %>%
cbind(rep(c("b", "c"), each = nrow(.)/2)) %>%
as.data.frame() %>%
mutate(V4 = ifelse(V3 == "b", paste0(V1, "-", V2, V3),
paste0(V2, "-", V1, V3))) %>%
arrange(V4) %>%
group_split(row_number(), .keep = FALSE) %>%
map(.,
~ dat1 %>% transmute(!!.x[[4]] :=
rowSums(cbind(dat1[, .x[[1]]],
dat1[, .x[[2]]])) *
ifelse(dat1[, .x[[2]]] == 0, 1, NA))) %>%
bind_cols(dat1, .)
#> nodepair 3 4 5 3-4b 3-4c 3-5b 3-5c 4-5b 4-5c
#> 1 A6_A1 2 5 1 NA NA NA NA NA NA
#> 2 A6_A2 2 5 1 NA NA NA NA NA NA
#> 3 A6_A3 2 5 1 NA NA NA NA NA NA
#> 4 AL_A1 1 0 0 1 NA 1 NA 0 0
#> 5 D_A6 0 3 0 NA 3 0 0 3 NA
#> 6 F_A1 1 0 1 1 NA NA NA NA 1
#> 7 H_D 0 0 2 0 0 NA 2 NA 2
#> 8 H_H 0 0 2 0 0 NA 2 NA 2
Created on 2024-02-15 with reprex v2.0.2