rphyloseq

Trouble making an object in phyloseq


I'm trying to make an object that I can use in the package phyloseq, but I can't seem to get it to work. Below is a small subset of my data. First is an otu table, the second has the taxa.

OTUs <- dput(OTU_table[1:5,])
structure(list(OTU_ID = c("OTU_1", "OTU_6", "OTU_16", "OTU_2", 
"OTU_216"), V2T4r5Croot = c(3505L, 5L, 124L, 0L, 8L), V2T4r5Broot = c(18880L, 
18390L, 1L, 10233L, 0L), R6T2r5Croot = c(82973L, 5195L, 444L, 
93L, 7L), V2T2r2Broot = c(13747L, 79L, 1603L, 33L, 0L), R3T2r5Broot = c(11212L, 
2L, 462L, 33977L, 0L), V2T2r2Croot = c(63779L, 354L, 5204L, 374L, 
0L), R3T4r5Croot = c(60109L, 1518L, 4067L, 875L, 2L), R3T1r5Aroot = c(28412L, 
3161L, 626L, 3465L, 131L), R3T4r2Croot = c(40569L, 110L, 575L, 
8642L, 0L), V2T2r5Aroot = c(22800L, 2225L, 1334L, 12185L, 4L), 
    R6T2r5Broot = c(50017L, 5739L, 4199L, 0L, 0L), R6T1r1Broot = c(52756L, 
    0L, 35L, 490L, 0L), R6T1r2Croot = c(14828L, 10227L, 180L, 
    3973L, 10L), V2T1r6Aroot = c(40317L, 146L, 543L, 5975L, 36L
    ), R6T2r1Broot = c(13801L, 524L, 189L, 6121L, 0L), V2T4r1Broot = c(58001L, 
    21L, 247L, 2359L, 5L), R6T2r6Croot = c(79608L, 715L, 384L, 
    13121L, 0L), R3T2r1Aroot = c(7938L, 187L, 2305L, 212L, 0L
    ), R6T2r6Aroot = c(20243L, 1098L, 320L, 10632L, 9L), V2T1r5Broot = c(11102L, 
    156L, 200L, 8205L, 0L), V2T4r2Aroot = c(7641L, 393L, 53L, 
    366L, 27L), R6T4r5Croot = c(5L, 68L, 7192L, 4L, 0L), R6T4r5Broot = c(40122L, 
    92L, 29L, 64631L, 59L), R3T4r6Croot = c(49960L, 101L, 97L, 
    18846L, 0L), R6T2r2Aroot = c(81204L, 7801L, 1499L, 13245L, 
    6L), R3T4r5Aroot = c(108839L, 5072L, 1894L, 1957L, 55L), 
    R3T2r1Broot = c(624L, 0L, 62L, 3687L, 0L), R6T1r5Croot = c(67805L, 
    0L, 238L, 2L, 0L), V2T4r6Croot = c(43210L, 24L, 0L, 33L, 
    0L), R3T1r6Aroot = c(6419L, 274L, 1062L, 2411L, 79L), R3T2r2Croot = c(53908L, 
    34726L, 3497L, 82L, 144L), R3T4r2Aroot = c(10503L, 48L, 23L, 
    27764L, 0L), R3T2r5Aroot = c(2386L, 79L, 39L, 1805L, 0L), 
    V2T2r1Croot = c(20324L, 318L, 14L, 1192L, 0L), V2T1r5Aroot = c(3933L, 
    33L, 6L, 3785L, 0L), V2T1r1Broot = c(99803L, 7377L, 203L, 
    1098L, 75L), R3T4r6Aroot = c(16601L, 1113L, 217L, 309L, 33L
    ), R3T2r1Croot = c(135822L, 24308L, 4986L, 219L, 230L), V2T1r5Croot = c(12444L, 
    139L, 32L, 211L, 0L), R3T1r6Croot = c(3957L, 9L, 117L, 293426L, 
    0L), R6T1r2Broot = c(92870L, 43L, 625L, 616L, 0L), V2T2r2Aroot = c(26697L, 
    654L, 130L, 31056L, 0L), R3T2r6Broot = c(82471L, 22990L, 
    3253L, 955L, 4L), R6T1r1Aroot = c(11187L, 0L, 5L, 0L, 0L), 
    R6T1r6Broot = c(6016L, 72L, 386L, 3368L, 0L), R3T1r1Aroot = c(55133L, 
    5854L, 494L, 1694L, 45L), V2T1r2Aroot = c(9346L, 139L, 17L, 
    64L, 0L), R3T4r1Aroot = c(84510L, 4049L, 1441L, 1193L, 5L
    ), R6T2r5Aroot = c(38997L, 33L, 273L, 967L, 0L), R3T4r2Broot = c(54402L, 
    565L, 567L, 9L, 0L), R3T1r2Broot = c(42977L, 24L, 132L, 3L, 
    7L), R6T1r5Aroot = c(5433L, 39L, 16L, 2L, 0L), R3T1r1Croot = c(4356L, 
    0L, 0L, 24719L, 0L), R3T4r5Broot = c(39402L, 6424L, 151L, 
    0L, 0L), R6T1r2Aroot = c(67639L, 14L, 16L, 1L, 0L), R3T2r5Croot = c(12136L, 
    3420L, 193L, 98L, 0L), R3T1r5Croot = c(21358L, 2876L, 347L, 
    9850L, 0L), V2T1r6Broot = c(16975L, 2L, 273L, 1397L, 98L), 
    R6T1r1Croot = c(7403L, 18L, 36L, 2112L, 0L), R3T1r1Broot = c(18301L, 
    1122L, 276L, 6921L, 7L), V2T2r6Croot = c(59794L, 2560L, 92L, 
    12437L, 0L), R6T1r5Broot = c(58396L, 1284L, 119L, 21078L, 
    0L), V2T4r6Broot = c(10496L, 773L, 1603L, 1950L, 19L), V2T1r6Croot = c(34687L, 
    9560L, 38L, 19L, 1L), R3T4r1Broot = c(23020L, 78L, 276L, 
    160L, 66L), V2T4r1Aroot = c(32591L, 91L, 197L, 308L, 0L), 
    V2T4r6Aroot = c(35314L, 3L, 1550L, 1775L, 0L), V2T2r6Aroot = c(12424L, 
    884L, 149L, 481L, 0L), V2T2r1Aroot = c(6721L, 52L, 203L, 
    4286L, 0L), R3T2r2Broot = c(26251L, 447L, 326L, 4834L, 0L
    ), V2T4r2Broot = c(27830L, 2404L, 1131L, 98L, 18L), R6T1r6Aroot = c(11818L, 
    14L, 34L, 1L, 0L), V2T1r1Croot = c(7961L, 5L, 376L, 2802L, 
    0L), R6T2r2Croot = c(25329L, 15L, 63L, 76L, 0L), R6T2r2Broot = c(1002L, 
    0L, 153L, 26L, 0L), R6T2r1Aroot = c(38869L, 11953L, 1987L, 
    2639L, 0L), V2T4r5Aroot = c(1838L, 18L, 1L, 47981L, 5L), 
    R6T4r1Aroot = c(3323L, 16L, 3L, 7212L, 0L), V2T2r5Croot = c(22124L, 
    1037L, 395L, 1515L, 296L), R3T4r6Broot = c(4112L, 0L, 0L, 
    1L, 0L), R3T1r5Broot = c(4443L, 120L, 528L, 1176L, 0L), V2T2r6Broot = c(2068L, 
    55L, 11L, 7180L, 0L), R3T2r2Aroot = c(4962L, 277L, 35L, 1L, 
    7L), V2T1r1Aroot = c(18506L, 0L, 44L, 93L, 0L), R3T1r2Aroot = c(19779L, 
    2L, 162L, 51355L, 8L), R6T2r1Croot = c(913L, 4L, 26L, 10L, 
    0L), V2T2r5Broot = c(7309L, 69L, 63L, 38L, 15L), V2T4r1Croot = c(8043L, 
    231L, 1351L, 787L, 18L), R3T1r6Broot = c(1973L, 1L, 0L, 40482L, 
    0L), R3T4r1Croot = c(4004L, 326L, 12L, 2020L, 11L), V2T1r2Croot = c(2712L, 
    21L, 43L, 2127L, 0L), V2T4r2Croot = c(3711L, 118L, 0L, 1487L, 
    50L), R3T2r6Croot = c(1491L, 290L, 55L, 33L, 0L), R6T1r6Croot = c(8541L, 
    0L, 0L, 0L, 0L), R6T2r6Broot = c(2561L, 6L, 2L, 387L, 0L), 
    V2T2r1Broot = c(2128L, 315L, 180L, 1483L, 0L), V2T1r2Broot = c(2363L, 
    78L, 260L, 2182L, 0L), R3T2r6Aroot = c(486L, 0L, 191L, 1209L, 
    0L), R3T1r2Croot = c(6014L, 18L, 126L, 587L, 0L), NC1root = c(5L, 
    0L, 0L, 0L, 0L), R6T4r6Aroot = c(246L, 3L, 7L, 83L, 0L)), row.names = c(NA, 
5L), class = "data.frame")

and

taxa <- dput(taxa[c(1,2,6,16,216),])
structure(list(OTU_ID = c("OTU_1", "OTU_2", "OTU_6", "OTU_16", 
"OTU_216"), Kingdom = c("Fungi_1", "Fungi_1", "Fungi_1", "Fungi_1", 
"Fungi_1"), Phylum = c("Ascomycota_1", "Ascomycota_1", "", "Ascomycota_1", 
"Basidiomycota_1"), Class = c("Sordariomycetes_1", "Dothideomycetes_1", 
"", "Dothideomycetes_1", "Tremellomycetes_1"), Order = c("Hypocreales_1", 
"Pleosporales_1", "", "Pleosporales_1", "Tremellales_1"), Family = c("Nectriaceae_1", 
"Corynesporascaceae_1", "", "Pleosporaceae_1", "Trimorphomycetaceae"
), Genus = c("Fusarium_1", "Corynespora_1", "", "Alternaria_1", 
"Saitozyma"), Species = c("", "Corynespora cassiicola ", "", 
"", "")), row.names = c(1L, 2L, 6L, 16L, 216L), class = "data.frame")

Here's some code I've tried:

OTUs1 <- as.matrix(OTUs)
taxa1 <- as.matrix(taxa)

OTUs2 = otu_table(OTUs1, taxa_are_rows = TRUE)

physeq <- phyloseq(OTUs2, taxa1)

I get this error when I run the code to try and make the object: Error in phyloseq(OTUs2, taxa1) : Problem with OTU/taxa indices among those you provided. Check using intersect() and taxa_names()


Solution

  • I assume that you are getting this error because the row.names between both matrices do not match. The following steps should resolve your error:

    require("dplyr")
    
    # Add OTU_IDs as row.names
    row.names(taxa) <- taxa[,'OTU_ID']
    row.names(OTUs) <- OTUs[,'OTU_ID']
    
    # Remove the OTU_ID column
    taxa <- taxa[,colnames(taxa) != 'OTU_ID']
    OTUs <- OTUs[,colnames(OTUs) != 'OTU_ID']
    
    # Make sure that OTU_IDs in taxa and OTUs overlap and are in the same order
    ids <- dplyr::intersect(row.names(taxa), row.names(OTUs))
    taxa <- taxa[ids,]
    OTUs <- OTUs[ids,]
    
    # Convert to phyloseq object
    taxa <- tax_table(as.matrix(taxa))
    OTUs <- otu_table(as.matrix(OTUs), taxa_are_rows = TRUE)
    physeq <- phyloseq(taxa, OTUs)