I want to measure the 1-factor ANOVA with 4-levels: ctl
against schiz
, bp
, dep
. I'm expecting the aov.run
to return a numeric vector with 3 variables, since I'm comparing ctl
against the other three levels. However, I'm only getting 2 variables in aov.run
. Why?
d <- read.table("filtered.mds", header=T)
ann <- read.table("clinical_table.txt", header=T, sep="\t")
# Create new dataframe
dat <- t(cbind(d$C1,d$C2))
colnames(dat) <- paste(ann$Profile, data.table::rowid(ann$Profile), sep="_")
rownames(dat) <- c("C1", "C2")
# Levels
ctl <- grepl("^Unaffected control", ann$Profile)
schiz <- grepl("^Schiz.", ann$Profile)
bp <- grepl("^BP", ann$Profile)
dep <- grepl("^Dep.", ann$Profile)
# 1-factor ANOVA with 4 levels
aov.lvl <- function(x,s1,s2,s3,s4) {
x1 <- as.numeric(x[s1])
x2 <- as.numeric(x[s2])
x3 <- as.numeric(x[s3])
x4 <- as.numeric(x[s4])
fac <- c(rep("A",length(x1)), rep("B",length(x2)), rep("C",length(x3)), rep("D",length(x4)))
a.dat <- data.frame(as.factor(fac),c(x1,x2,x3,x4))
names(a.dat) <- c("factor","express")
p.out <- summary(aov(express~factor, a.dat))[[1]][1,5]
return(p.out)
}
aov.run <- apply(dat, 1, aov.lvl, s1=ctl, s2=schiz, s3=bp, s4=dep)
dataframe d
> dput(d)
structure(list(FID = c("AC10", "AC11", "AC12", "AC13", "AC14",
"AC15", "AC17", "AC18", "AC19", "AC1", "AC20", "AC21", "AC22",
"AC23", "AC24", "AC25", "AC26", "AC27", "AC29", "AC2", "AC30",
"AC31", "AC32", "AC33", "AC34", "AC35", "AC36", "AC37", "AC38",
"AC39", "AC3", "AC40", "AC41", "AC42", "AC43", "AC45", "AC46",
"AC47", "AC48", "AC49", "AC50", "AC51", "AC52", "AC53", "AC54",
"AC55", "AC56", "AC57", "AC58", "AC5", "AC60", "AC61", "AC62",
"AC63", "AC64", "AC65", "AC66", "AC67", "AC69", "AC6", "AC70",
"AC71", "AC72", "AC73", "AC74", "AC75", "AC76", "AC77", "AC78",
"AC79", "AC7", "AC80", "AC81", "AC82", "AC83", "AC84", "AC86",
"AC87", "AC88", "AC89", "AC8", "AC90", "AC91", "AC92", "AC9",
"AC100", "AC101", "AC102", "AC103", "AC104", "AC105", "AC16",
"AC68", "AC93", "AC94", "AC95", "AC96", "AC97", "AC99", "DE10",
"DE12", "DE13", "DE14", "DE15", "DE16", "DE17", "DE18", "DE19",
"DE1", "DE20", "DE21", "DE22", "DE23", "DE25", "DE26", "DE27",
"DE2", "DE33", "DE34", "DE35", "DE36", "DE37", "DE38", "DE39",
"DE3", "DE40", "DE41", "DE42", "DE44", "DE45", "DE46", "DE47",
"DE48", "DE49", "DE4", "DE50", "DE51", "DE52", "DE53", "DE54",
"DE55", "DE56", "DE57", "DE58", "DE59", "DE60", "DE7", "DE9",
"DE29", "DE30", "DE32", "DE43", "DE5"), IID = c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), SOL = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), C1 = c(-0.00385609, 0.0101138, -0.0146168, -0.0218236, -0.0134745,
-0.017089, 0.0152448, 0.0134359, 0.00540102, -0.0125389, 0.00463956,
-0.00416079, -0.000325898, 0.0132781, 0.0130666, 0.00718399,
-0.0051912, -0.0227934, 0.0364974, -0.0180301, -0.0226556, -0.00585266,
0.0258924, -0.00994298, -0.00380612, 0.0187883, 0.0103367, 0.00747272,
0.0191431, -0.00501846, -0.00118336, 0.0361201, 0.00830498, 0.00380194,
0.00667686, -0.000441697, -0.00170991, -0.0281008, -0.00424591,
0.0213412, 0.00261405, 0.016154, 0.0098956, 0.0141544, 0.0367203,
0.0144693, 0.0256731, -0.00218851, 0.0204603, -0.000603019, -0.00504176,
-0.00917368, 0.00237875, 0.0175946, 0.0188388, 0.0368965, -0.00408476,
0.00871812, -0.00851917, 0.0252035, -0.00915532, 0.0223745, 0.016866,
0.026825, 0.0366276, 0.0540474, 0.0386237, 0.0029996, 0.0207176,
0.0177353, -0.0066377, 0.0343811, 0.0282509, 0.00526683, 0.0459516,
0.00976286, 0.0259005, -0.00104822, -0.012696, 0.0134071, 0.0231658,
0.00359455, 0.0194968, -0.000936478, -0.0029218, -0.0058512,
-0.000837274, -0.0129465, -0.0102079, -0.00559039, 0.0118966,
0.00147658, 0.0120396, -0.0104779, -0.0315149, -0.0115454, -0.0122457,
-6.72242e-05, 0.00370599, -0.0164126, -0.0107853, -0.0271741,
-0.0212005, -0.0445118, -0.0387773, -0.025109, -0.0321735, -0.0398603,
-0.0266408, -0.0260984, -0.0296337, -0.0185381, -0.0403944, 0.0197937,
-0.0176322, -0.013238, -0.0071666, -7.27277e-05, 0.00397489,
0.0335056, -0.00604706, -0.00926438, 0.00706601, -0.0156982,
-0.0275085, -0.00864179, -0.0247967, -0.030564, -0.00767327,
-0.0235161, 0.00649758, -0.0329062, -0.0016138, -0.00701695,
0.00819454, 0.0100377, 0.0250199, -0.0493141, -0.0216641, -0.0244709,
-0.00466616, 0.016751, -0.0191688, -0.00492488, -0.0162364, -0.0167085,
-0.0113427, 0.000422333, 0.030274, 0.0317995, 0.00237194, -0.00693838,
-0.0100835), C2 = c(0.000865365, -0.001752, 0.0189917, -0.023343,
-0.0340531, -0.0258976, -0.00794043, 0.0173163, 0.00639341, -0.0343077,
0.01083, -0.0402179, 0.0158751, -0.00262893, -0.0216757, -0.00261259,
-0.00542089, -0.00515714, 0.0105216, -0.0193606, 0.00692795,
-0.0117295, -0.0235627, -0.00850041, -0.0156109, -0.00871875,
-0.0163218, 0.0227143, -0.0161961, -0.0176719, -0.0070994, 0.0262932,
0.00164033, -0.00969917, -0.0197631, -0.0154387, -0.0194608,
0.00442207, -0.0234804, 0.00822342, -0.00657274, -0.0092332,
0.0130892, -0.0345162, -0.0114187, -0.0129497, -0.00306092, 0.0417858,
0.0262002, -0.0188849, -0.0184154, -0.0109956, -0.0151195, -0.00414531,
0.010064, 0.0308816, -0.0153337, 0.0157867, -0.0289866, -0.0106713,
0.000112714, -0.00152177, 0.0184509, 0.0112357, 0.00097954, 0.032083,
0.0190258, -0.0371498, -0.0307498, -0.00947645, -0.00198995,
0.015845, -0.0240248, -0.0122369, -0.00107049, -0.0144661, 0.0207883,
-0.0418619, -0.0123712, -0.0212721, -0.00667244, -0.028512, -0.00522357,
-0.018842, -0.0123026, -0.00511655, 0.0188473, 0.00739189, 0.0321578,
-0.015449, 0.0214631, -0.00995001, -0.00144645, 0.00934907, 0.0344757,
-0.0220224, 0.0121403, -0.00615057, -0.0208969, 0.0313899, -0.0251011,
0.011635, 0.00536455, 0.0233033, -0.0019204, 0.0273593, 0.00844028,
0.00181444, 0.02824, 0.0255231, 0.00266055, -0.00850383, -0.0129938,
0.0268634, 0.0195986, 0.0320615, -0.0026514, 0.0127147, 0.014279,
0.0553434, -0.020963, 0.00629119, -0.0244099, -0.0080923, 0.0173508,
0.0485753, -0.00666049, 0.0501603, 0.0029162, 0.0267363, 0.0066606,
0.00857736, 0.0172693, -0.00827586, -0.0117478, -0.00336638,
0.00954265, -0.00889617, 0.00290055, 0.0229832, 0.0504569, 0.025979,
-0.00795356, -0.0135421, -0.00359528, 0.0150037, -0.0105817,
0.0167827, 0.0110882, 0.00200862, -0.00597284, -0.0188371, -0.00827599
)), class = "data.frame", row.names = c(NA, -153L))
columnann$Profile
> dput(ann$Profile)
c("Schiz.", "Schiz.", "Schiz.", "BP", "BP", "Unaffected control",
"Schiz.", "BP", "Unaffected control", "BP", "BP", "BP", "Schiz.",
"BP", "Unaffected control", "Schiz.", "Schiz.", "Unaffected control",
"Unaffected control", "BP", "Unaffected control", "Schiz.", "BP",
"Unaffected control", "BP", "Unaffected control", "BP", "Schiz.",
"Unaffected control", "Schiz.", "Schiz.", "Schiz.", "Schiz.",
"BP", "Unaffected control", "Schiz.", "BP", "Schiz.", "BP", "Unaffected control",
"BP", "Unaffected control", "Unaffected control", "Unaffected control",
"Unaffected control", "Schiz.", "Unaffected control", "BP", "BP",
"BP", "Unaffected control", "BP", "BP", "BP", "BP", "Unaffected control",
"Schiz.", "Unaffected control", "BP", "BP", "Unaffected control",
"Unaffected control", "BP", "Schiz.", "BP", "Schiz.", "BP", "Unaffected control",
"Schiz.", "Unaffected control", "Schiz.", "Unaffected control",
"Schiz.", "Schiz.", "Unaffected control", "Unaffected control",
"Unaffected control", "Schiz.", "Schiz.", "BP", "BP", "Unaffected control",
"Unaffected control", "Schiz.", "Schiz.", "Schiz.", "Schiz.",
"BP", "Unaffected control", "BP", "Unaffected control", "BP",
"Schiz.", "Schiz.", "Schiz.", "Unaffected control", "Unaffected control",
"Schiz.", "Unaffected control", "Unaffected control", "Unaffected control",
"BP", "BP", "Dep.", "Unaffected control", "Unaffected control",
"Dep.", "Dep.", "Dep.", "Dep.", "Dep.", "Unaffected control",
"Unaffected control", "Schiz.", "Dep.", "BP", "Dep.", "Schiz.",
"Schiz.", "Schiz.", "BP", "Unaffected control", "Unaffected control",
"Unaffected control", "BP", "BP", "Dep.", "Schiz.", "Dep.", "BP",
"Unaffected control", "Unaffected control", "Schiz.", "Schiz.",
"Unaffected control", "Unaffected control", "BP", "BP", "Schiz.",
"Dep.", "BP", "Dep.", "BP", "Schiz.", "Unaffected control", "Dep.",
"BP", "Schiz.", "Dep.", "Dep.", "BP", "BP", "Schiz.")
aov.run
is returning the p-value associated with the ANOVA run on each of your columns separately. There's only one p-value per ANOVA, so you get one for C1 and one for C2 for a total of two. That's working as expected!
It sounds like you're interested instead in the post-hoc results of the ANOVA, obtained by a Tukey test or something similar. Tukey's HSD will give you the p-value of each comparison separately for 6 comparisons per ANOVA ("Dep.-BP", "Schiz.-BP", "Unaffected control-BP", "Schiz.-Dep.", "Unaffected control-Dep.", and "Unaffected control-Schiz.") and it sounds like you're only interested in the "Unaffected control-BP", "Unaffected control-Dep.", and "Unaffected control-Schiz." comparisons. You can get these by replacing your calls to summary()
with tukeyHSD()
:
v <- cbind(d, Profile=ann$Profile)
# p-value for each ANOVA
summary(aov(C1~Profile, data = v))[[1]][1,5]
summary(aov(C2~Profile, data = v))[[1]][1,5]
# p-value for each comparison within the ANOVA
TukeyHSD(aov(C1~Profile, data = v))
TukeyHSD(aov(C2~Profile, data = v))