rreplacenamatchingpairwise

Replacing rows of NA with another row meeting specific column matching


Here's a data snippet:

Tab1 <- read.table(text = "
  nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1     E_A6   3   3  0  0  0  0  0  0  0  0  1
2     E_A6   4   3 NA NA NA NA NA NA NA NA NA
3     E_A6   5   3 NA NA NA NA NA NA NA NA NA
4     E_AL   3   3  0  5  0  0  0  0  0  0  1
5     E_AL   4   3 NA NA NA NA NA NA NA NA NA
6     E_AL   5   3 NA NA NA NA NA NA NA NA NA   
", header = TRUE)

For each nodepair group, I would like to replace rows when they contain NA's with the values from the row where ES2 is equal to ES1, provided that it matches with the ES2 of the NA row. And then I need to assign an NA to the ES1 column of the previously NA row.

like this:

Tab2 <- read.table(text = "
  nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
1     E_A6   3   3  0  0  0  0  0  0  0  0  1
2     E_A6   NA  3  0  0  0  0  0  0  0  0  1
3     E_A6   NA  3  0  0  0  0  0  0  0  0  1
4     E_AL   3   3  0  5  0  0  0  0  0  0  1
5     E_AL   NA  3  0  5  0  0  0  0  0  0  1
6     E_AL   NA  3  0  5  0  0  0  0  0  0  1   
", header = TRUE)

I've been trying out this kind of logic:

Tab2 <- Tab1  %>%
  rowwise %>%
  mutate(across(4:12, ~ifelse(is.na(rowsums), ???, .))))

Solution

  • Maybe a for-loop would work? E.g.

    Tab1 <- read.table(text = "
      nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
    1     E_A6   3   3  0  0  0  0  0  0  0  0  1
    2     E_A6   4   3 NA NA NA NA NA NA NA NA NA
    3     E_A6   5   3 NA NA NA NA NA NA NA NA NA
    4     E_AL   3   3  0  5  0  0  0  0  0  0  1
    5     E_AL   4   3 NA NA NA NA NA NA NA NA NA
    6     E_AL   5   3 NA NA NA NA NA NA NA NA NA   
    ", header = TRUE,  check.names = FALSE)
    
    for (i in 1:nrow(Tab1)) {
      if (Tab1[i,2] == Tab1[i,3]) {
        stored_line <- Tab1[i,4:12]
      }
      if (all(is.na(Tab1[i,4:12]))) {
        Tab1[i,4:12] <- stored_line
        Tab1[i,2] <- NA
      }
    }
    
    Tab1
    #>   nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
    #> 1     E_A6   3   3  0  0  0  0  0  0  0  0  1
    #> 2     E_A6  NA   3  0  0  0  0  0  0  0  0  1
    #> 3     E_A6  NA   3  0  0  0  0  0  0  0  0  1
    #> 4     E_AL   3   3  0  5  0  0  0  0  0  0  1
    #> 5     E_AL  NA   3  0  5  0  0  0  0  0  0  1
    #> 6     E_AL  NA   3  0  5  0  0  0  0  0  0  1
    

    I tried using a join but couldn't figure out the edge-cases, e.g. if a line has NAs and values:

    # add some sevens to line 2
    Tab1 <- read.table(text = "
      nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
    1     E_A6   3   3  0  0  0  0  0  0  0  0  1
    2     E_A6   4   3 NA NA NA NA NA NA NA  7  7
    3     E_A6   5   3 NA NA NA NA NA NA NA NA NA
    4     E_AL   3   3  0  5  0  0  0  0  0  0  1
    5     E_AL   4   3 NA NA NA NA NA NA NA NA NA
    6     E_AL   5   3 NA NA NA NA NA NA NA NA NA   
    ", header = TRUE,  check.names = FALSE)
    
    # solution from @thelatemail's comment:
    library(tidyverse)
    Tab1 %>% select(nodepair,ES2) %>% left_join(Tab1 %>% filter(ES1 == ES2), by=c("nodepair","ES2"))
    #>   nodepair ES2 ES1 ++ -- +- -+ 0+ +0 0- -0 00
    #> 1     E_A6   3   3  0  0  0  0  0  0  0  0  1
    #> 2     E_A6   3   3  0  0  0  0  0  0  0  0  1
    #> 3     E_A6   3   3  0  0  0  0  0  0  0  0  1
    #> 4     E_AL   3   3  0  5  0  0  0  0  0  0  1
    #> 5     E_AL   3   3  0  5  0  0  0  0  0  0  1
    #> 6     E_AL   3   3  0  5  0  0  0  0  0  0  1
    
    # compared to the for-loop approach
    for (i in 1:nrow(Tab1)) {
      if (Tab1[i,2] == Tab1[i,3]) {
        stored_line <- Tab1[i,4:12]
      }
      if (all(is.na(Tab1[i,4:12]))) {
        Tab1[i,4:12] <- stored_line
        Tab1[i,2] <- NA
      }
    }
    Tab1
    #>   nodepair ES1 ES2 ++ -- +- -+ 0+ +0 0- -0 00
    #> 1     E_A6   3   3  0  0  0  0  0  0  0  0  1
    #> 2     E_A6   4   3 NA NA NA NA NA NA NA  7  7
    #> 3     E_A6  NA   3  0  0  0  0  0  0  0  0  1
    #> 4     E_AL   3   3  0  5  0  0  0  0  0  0  1
    #> 5     E_AL  NA   3  0  5  0  0  0  0  0  0  1
    #> 6     E_AL  NA   3  0  5  0  0  0  0  0  0  1
    

    Created on 2024-02-06 with reprex v2.1.0