Here's a data snippet:
Tab1 <- read.table(text = "
nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1 E_A6 3 3 0 0 0 0 0 0 0 0 1
2 E_A6 4 3 NA NA NA NA NA NA NA NA NA
3 E_A6 5 3 NA NA NA NA NA NA NA NA NA
4 E_AL 3 3 0 5 0 0 0 0 0 0 1
5 E_AL 4 3 NA NA NA NA NA NA NA NA NA
6 E_AL 5 3 NA NA NA NA NA NA NA NA NA
", header = TRUE)
For each nodepair group, I would like to replace rows when they contain NA's with the values from the row where ES2 is equal to ES1, provided that it matches with the ES2 of the NA row. And then I need to assign an NA to the ES1 column of the previously NA row.
like this:
Tab2 <- read.table(text = "
nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1 E_A6 3 3 0 0 0 0 0 0 0 0 1
2 E_A6 NA 3 0 0 0 0 0 0 0 0 1
3 E_A6 NA 3 0 0 0 0 0 0 0 0 1
4 E_AL 3 3 0 5 0 0 0 0 0 0 1
5 E_AL NA 3 0 5 0 0 0 0 0 0 1
6 E_AL NA 3 0 5 0 0 0 0 0 0 1
", header = TRUE)
I've been trying out this kind of logic:
Tab2 <- Tab1 %>%
rowwise %>%
mutate(across(4:12, ~ifelse(is.na(rowsums), ???, .))))
Maybe a for-loop would work? E.g.
Tab1 <- read.table(text = "
nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1 E_A6 3 3 0 0 0 0 0 0 0 0 1
2 E_A6 4 3 NA NA NA NA NA NA NA NA NA
3 E_A6 5 3 NA NA NA NA NA NA NA NA NA
4 E_AL 3 3 0 5 0 0 0 0 0 0 1
5 E_AL 4 3 NA NA NA NA NA NA NA NA NA
6 E_AL 5 3 NA NA NA NA NA NA NA NA NA
", header = TRUE, check.names = FALSE)
for (i in 1:nrow(Tab1)) {
if (Tab1[i,2] == Tab1[i,3]) {
stored_line <- Tab1[i,4:12]
}
if (all(is.na(Tab1[i,4:12]))) {
Tab1[i,4:12] <- stored_line
Tab1[i,2] <- NA
}
}
Tab1
#> nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
#> 1 E_A6 3 3 0 0 0 0 0 0 0 0 1
#> 2 E_A6 NA 3 0 0 0 0 0 0 0 0 1
#> 3 E_A6 NA 3 0 0 0 0 0 0 0 0 1
#> 4 E_AL 3 3 0 5 0 0 0 0 0 0 1
#> 5 E_AL NA 3 0 5 0 0 0 0 0 0 1
#> 6 E_AL NA 3 0 5 0 0 0 0 0 0 1
I tried using a join but couldn't figure out the edge-cases, e.g. if a line has NAs and values:
# add some sevens to line 2
Tab1 <- read.table(text = "
nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1 E_A6 3 3 0 0 0 0 0 0 0 0 1
2 E_A6 4 3 NA NA NA NA NA NA NA 7 7
3 E_A6 5 3 NA NA NA NA NA NA NA NA NA
4 E_AL 3 3 0 5 0 0 0 0 0 0 1
5 E_AL 4 3 NA NA NA NA NA NA NA NA NA
6 E_AL 5 3 NA NA NA NA NA NA NA NA NA
", header = TRUE, check.names = FALSE)
# solution from @thelatemail's comment:
library(tidyverse)
Tab1 %>% select(nodepair,ES2) %>% left_join(Tab1 %>% filter(ES1 == ES2), by=c("nodepair","ES2"))
#> nodepair ES2 ES1 ++ -- +- -+ 0+ +0 0- -0 00
#> 1 E_A6 3 3 0 0 0 0 0 0 0 0 1
#> 2 E_A6 3 3 0 0 0 0 0 0 0 0 1
#> 3 E_A6 3 3 0 0 0 0 0 0 0 0 1
#> 4 E_AL 3 3 0 5 0 0 0 0 0 0 1
#> 5 E_AL 3 3 0 5 0 0 0 0 0 0 1
#> 6 E_AL 3 3 0 5 0 0 0 0 0 0 1
# compared to the for-loop approach
for (i in 1:nrow(Tab1)) {
if (Tab1[i,2] == Tab1[i,3]) {
stored_line <- Tab1[i,4:12]
}
if (all(is.na(Tab1[i,4:12]))) {
Tab1[i,4:12] <- stored_line
Tab1[i,2] <- NA
}
}
Tab1
#> nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
#> 1 E_A6 3 3 0 0 0 0 0 0 0 0 1
#> 2 E_A6 4 3 NA NA NA NA NA NA NA 7 7
#> 3 E_A6 NA 3 0 0 0 0 0 0 0 0 1
#> 4 E_AL 3 3 0 5 0 0 0 0 0 0 1
#> 5 E_AL NA 3 0 5 0 0 0 0 0 0 1
#> 6 E_AL NA 3 0 5 0 0 0 0 0 0 1
Created on 2024-02-06 with reprex v2.1.0