rdplyrphyloseq

Conflicts within function calls to variables in R


I'm somewhat confused with the different values a variable can take within a function and how it is possible to harmonize them so that they suit the different tasks and function calls within the custom function. Here I wrote an example where the argument of the function should be a string for some steps (conditional testing and first function call), and for others it should not (within dplyr functions for instance).

# Packages
library(tidyverse)
library(phyloseq)

# Create some data
otu <- data.frame(Sample1 = seq(1,6,1),
                  Sample2 = rep(3.5, 6),
                  Sample3 = c(0,15,0,4,2,0),
                  Sample4 = c(7,7,0,7,0,0))
rownames(otu) <- paste0("Name",seq(1,6,1))
otu
#>       Sample1 Sample2 Sample3 Sample4
#> Name1       1     3.5       0       7
#> Name2       2     3.5      15       7
#> Name3       3     3.5       0       0
#> Name4       4     3.5       4       7
#> Name5       5     3.5       2       0
#> Name6       6     3.5       0       0

tax <- data.frame(HighestRank = paste0("HighRank",c(1,2,1,1,1,1)),
                  LowestRank = paste0("LowRank",c(1,2,1,3,3,3)))
rownames(tax) <- paste0("Name",seq(1,6,1))
tax
#>       HighestRank LowestRank
#> Name1   HighRank1   LowRank1
#> Name2   HighRank2   LowRank2
#> Name3   HighRank1   LowRank1
#> Name4   HighRank1   LowRank3
#> Name5   HighRank1   LowRank3
#> Name6   HighRank1   LowRank3

foo <- phyloseq(otu_table(as.matrix(otu), taxa_are_rows = TRUE),
                tax_table(as.matrix(tax)))
foo
#> phyloseq-class experiment-level object
#> otu_table()   OTU Table:         [ 6 taxa and 4 samples ]
#> tax_table()   Taxonomy Table:    [ 6 taxa by 2 taxonomic ranks ]

# Write a function that takes the physloseq object, 
# Aggregates it according to wished rank
# And filters taxa given a character vector

fooFunction <- function(physeq, taxa, rank = "Names"){
  if(rank != "Names"){ # Condition can be tested using a string
  # Aggregate object by wished rank
  ag.physeq <- tax_glom(physeq, rank)
  ag.names <- tax_table(ag.physeq) %>% data.frame() %>% pull({{rank}})
  new.names <- paste(rownames(otu_table(ag.physeq)),ag.names,sep="_")
  
  taxa=paste0(taxa,collapse="|") # Modify character vector to a regex
  
  # Do some stuff using dplyr variable assignment and call to variable
  ag.physeq %>% 
    otu_table %>%
    as.data.frame() %>%
    rownames_to_column(var = "Names") %>%
    mutate({{rank}}:=new.names) %>%
    filter(grepl(taxa, {{rank}}))
}
}

# Expected behavior
taxa <- c("LowRank1","LowRank2")

ag.physeq <- tax_glom(foo, "LowestRank")
ag.names <- tax_table(ag.physeq) %>% data.frame() %>% pull(LowestRank)
new.names <- paste(rownames(otu_table(ag.physeq)),ag.names,sep="_")

taxa=paste0(taxa,collapse="|")

ag.physeq %>% 
  otu_table %>%
  as.data.frame() %>%
  rownames_to_column(var = "Names") %>%
  mutate(LowestRank=new.names) %>%
  filter(grepl(taxa, LowestRank))
#>   Names Sample1 Sample2 Sample3 Sample4     LowestRank
#> 1 Name1       4     7.0       0       7 Name1_LowRank1
#> 2 Name2       2     3.5      15       7 Name2_LowRank2

Option 1: passing a character string as rank to the function

fooFunction(foo, taxa=c("LowRank1","LowRank2"), rank = "LowestRank")
#> [1] Names      Sample1    Sample2    Sample3    Sample4    LowestRank
#> <0 rows> (or 0-length row.names)

Here, the filter step does not work because the call to grepl is wrong

Option 2: rank is not a character string

fooFunction(foo, taxa=c("LowRank1","LowRank2"), rank = LowestRank)
#> Error in fooFunction(foo, taxa = c("LowRank1", "LowRank2"), rank = LowestRank): object 'LowestRank' not found

The conditional test does not work because rank does not exist.

Even if getting rid of the condition, there are others functions that require the argument as a string, e.g.

fooFunction2 <- function(physeq, taxa, rank = "Names"){
  # Aggregate object by wished rank
  ag.physeq <- tax_glom(physeq, as.character(rank))
  ag.physeq
  }

fooFunction2(foo, taxa=c("LowRank1","LowRank2"), rank = LowestRank)
#> Error in h(simpleError(msg, call)): error in evaluating the argument 'x' in selecting a method for function '%in%': object 'LowestRank' not found

Here also rank is not found because it does not exist.

I'd be extremely grateful for an explanation how to rightfully manage those arguments within a function!

Created on 2022-11-22 by the reprex package (v2.0.1)


Solution

  • you can just replace the filter call with a modified version that takes a character argument:

    fooFunction <- function(physeq, taxa, rank = "Names"){
      
      if(rank != "Names"){ # Condition can be tested using a string
        # Aggregate object by wished rank
        ag.physeq <- tax_glom(physeq, rank)
        ag.names <- tax_table(ag.physeq) %>% data.frame() %>% pull({{rank}})
        new.names <- paste(rownames(otu_table(ag.physeq)),ag.names,sep="_")
        
        taxa=paste0(taxa,collapse="|") # Modify character vector to a regex
        
        # Do some stuff using dplyr variable assignment and call to variable
        ag.physeq %>% 
          otu_table %>%
          as.data.frame() %>%
          rownames_to_column(var = "Names") %>%
          mutate({{rank}}:=new.names) %>%
          filter(grepl(taxa, .[[rank]])) # use .[[ syntax to allow for characters
      }
    }
    
    fooFunction(foo, taxa=c("LowRank1","LowRank2"), rank = "LowestRank")
    
    #>   Names Sample1 Sample2 Sample3 Sample4     LowestRank
    #> 1 Name1       4     7.0       0       7 Name1_LowRank1
    #> 2 Name2       2     3.5      15       7 Name2_LowRank2