I have several boxplots that I am running some statistics on through a for loop.
I want to be able to output the data into a dataframe. i.e. something like this (I've made up random numbers):
names <- c('B1','B2','B3')
for (name in names){
mereLoc <- scaledDaily %>%
filter(scaledDaily$loc == name)
mereLoc$year <- as.numeric(mereLoc$year)
median <- tapply(mereLoc$mAODscale, mereLoc$year, median, na.rm=T)
print(name)
print(median)
krusk <- kruskal.test(mAODscale~year, data=mereLoc)
print(krusk)
pairs <- pairwise.wilcox.test(mereLoc$mAODscale, mereLoc$year, p.adjust.method='BH')
print(pairs)
}
here is some test data. its not my actual data, I've just fabricated some.
structure(list(loc = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L), levels = c("B1", "B2", "B3"), class = "factor"),
mAODscale = c(0.561948848, 0.851883432, 0.293151829, 0.683807808,
0.864472706, 0.380303934, 0.324501709, 0.765022304, 0.900772901,
0.204731715, 0.877104175, 0.56367162, 0.206528162, 0.353350116,
0.219628257, 0.840723901, 0.716389918, 0.569798858, 0.707583441,
0.120064246, 0.275325307, 0.438391155, 0.308969668, 0.350156436,
0.886955315, 0.693416677, 0.710065022), year = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), levels = c("1",
"2", "3"), class = "factor")), row.names = c(NA, -27L), class = "data.frame")
Here is my solution:
names <- c('B1','B2','B3','B4','B5','B6','B7','B8',
'SSSI1','SSSI2','SSSI3',
"CM1", "CM2", "CM3", "CM4", "CM7", "CM8", "CM9", "CM11",
"Crose 1", "Crose 2", "Crose 3")
stats <- list()
for (name in names){
mereLoc <- scaledDaily %>%
filter(scaledDaily$loc == name)
mereLoc$year <- as.numeric(mereLoc$year)
median <- aggregate(mereLoc$mAODscale, list(mereLoc$year), median, na.rm=T)
krusk <- kruskal.test(mAODscale~year, data=mereLoc)
krusk2 <- tidy(krusk)
pairs <- pairwise.wilcox.test(mereLoc$mAODscale, mereLoc$year, p.adjust.method='BH')
pairs2 <- tidy(pairs)
stats[[name]] <- c("loc"=name,
"Y1_median"=median$x[1],
"Y2_median"=median$x[2],
"Y3_median"=median$x[3],
"KWCHiSq"=krusk$statistic,
"df"=krusk$parameter,
"pvalue"=krusk$p.value,
"Y1Y2"=pairs2[1,3],
"Y1Y3"=pairs2[2,3],
"Y2Y3"=pairs2[3,3])
}
stats <- Reduce(function(x,y) merge(x,y,all=TRUE), stats)
names(stats) <- c('loc','Y1_median','Y2_median','Y3_median',
'KW-ChiSq','df','pvalue','Y1-Y2','Y1-Y3','Y2-Y3')