I have done a NMDS graph of a fungal community dataset sampled from 144 soil samples collected under three host plants (Dryas, Kobresia, Salix) - 48 plots from each of the three plants. Those 48 plots have been treated with 8 different treatments, (C,N,P,NP,W,WN,WP and WNP). Half of those treatments have water added, and the other half do not.
Thus, I have three factors I wish to identify the points in my NMDS by - plants, water/no water, and the 8 treatments. The standard is to have just two factors with one having the points differentiated by colour, and the other differentiated by shapes. However, the third factor (e.g. water/no water) then lacks an obvious feature to be distinguished by.
This is my code:
#environment vectors
#Treatment vector:
Treatment=c(rep("WP",1),rep("WP",1),rep("WP",1),rep("P",1),rep("P",1),rep("P",1),rep("N",1),rep("N",1),rep("NP",1),rep("NP",1)......[148]
###Water vector:
Water=c(rep("Watering",1),rep("Watering",1),rep("Watering",1),rep("No watering",1),rep("No watering",1),rep("No watering",1),rep("No watering",1),rep("No watering",1).......[148]
#plant vector:
Plantvector=c(rep("Dryas",1),rep("Kobresia",1),rep("Salix",1),rep("Dryas",1),rep("Kobresia",1),rep("Salix",1),rep("Dryas",1)........
#downloading the OTU table
zacktreat<- read.xlsx("Zack_OTUs_NMDS.xlsx", sheetName = "u_roots",row.names=1)
env<- read.xlsx("Zack_OTUs_NMDS.xlsx", sheetName = "env1",row.names=1, )
env[env == 9999] <- NA
env1<-env[-c(6,10,13,14,15,16)]
#reverting it
zacktreat<-t(zacktreat)
#doing the actual NMDS calculations
zacktreat_NMDS<-metaMDS(zacktreat, distance = "bray", k = 2, wascores = TRUE) #laver en Bray-Curtis-matrix og NMDS-analysen p? ?n gang
zacktreat_NMDS.envfit <- envfit(zacktreat_NMDS, env = env, perm = 999,na.rm = TRUE)
data for the envfit arrows
env.scores.zacktreat <- as.data.frame(scores(zacktreat_NMDS.envfit, display = "vectors")) #extracts relevant scores from envifit
env.scores.zacktreat <- cbind(env.scores.zacktreat, env.variables = rownames(env.scores.zacktreat)) #and then gives them their names
mult <- 3 #multiplier for the arrows and text for envfit below. You can change this and then rerun the plot command.
#The ggplot
p1<-ggplot(
# Create data object including treatment as a column
data = cbind(comm = rownames(zacktreat_NMDS$points), treat = Expvect, as.data.frame(zacktreat_NMDS$points)),
aes(x = MDS1, y = MDS2, xlab=""))+
Create polygon from hull dataframe
scale_colour_manual(values=c("green","blue","red","darkgrey","darkgreen","orange","purple4","deeppink","turquoise2")) +
scale_shape_manual(values=c(1, 16)) +
# Use aesthetics to color points by treatment
theme_bw() +
geom_point(aes(col=Treatment, shape=Water), size = 3) +
geom_segment(data = env.scores.zacktreat,
aes(x = 0, xend = mult*NMDS1, y = 0, yend = mult*NMDS2),
arrow = arrow(length = unit(0.25, "cm")), colour = "grey") + #arrows for envfit. doubled the length for similarity to the plot() function. NB check ?envfit regarding arrow length if not familiar with lengths
geom_text(data = env.scores.zacktreat, #labels the environmental variable arrows * "mult" as for the arrows
aes(x = mult*NMDS1, y = mult*NMDS2, label=env.variables),
size = 4,fontface="italic",
hjust = 0.5) +
#scale_fill_discrete(limits=c("C","N","P","NP","W","WN","WP", "WNP")) +
#geom_text(aes(label = comm), vjust = 1.5, size=5) +
coord_cartesian(xlim=c(-1.4,1.4), ylim=c(-1.0,1.0)) +
labs(title="With env.vectors", x="") +
theme(plot.margin=unit(c(0,0,0,0), "mm"))
Basically, NMDS plots are merely points sorted in a 2D-space by X and Y (or MDS1 and MDS2). Like this.
zacktreat_NMDS$points
MDS1 MDS2
X10D_WP_2 -0.02129782 -0.247132940
X10K_WP_2 -0.06159335 -0.091143442
X10S_WP_2 0.15047218 -0.251961539
X11D_P_2 0.18581153 -0.353744033
X11K_P_2 -0.20216965 -0.086797786
X11S_P_2 0.02733803 -0.335677136
X12D_N_2 0.29451589 -0.346472094
X12K_N_2 -0.14428059 -0.129560987
X13D_NP_2 0.16634831 -0.126218766
X13K_NP_2 -0.63748026 -0.402086997
X13S_NP_2 0.03787783 -0.054936657
X14D_WNP_2 -0.05264202 -0.534361509
X14K_WNP_2 -0.37575597 -0.085469131
X14S_WNP_2 0.04165887 -0.417154266
X15D_C_2 0.25282597 -0.400829162
X15K_C_2 -0.05095801 0.019005850
X15S_C_2 0.07280139 -0.211689823
X16K_W_2 -0.63515249 -0.192958200
X16S_W_2 0.31387911 0.223031310
X17D_W_3 -0.25269609 0.567593307
I want to draw something like my attached picture (apologies for the haplessness). Either with three factors separated - plant, water, and treatment, and with water as filled symbols, and no water as open symbols - or with the 4 of the 8 treatments that have water added (with W in their acronym) with filled circles/symbols, and those which do not have water added as open symbols, AND all 8 treatment groups additionally in colour.
I hope I am making myself clear, or if not, that my attached picture says more than my 1000 words.
Something like this, perhaps?
library(tidyverse)
# Create a fake dataframe
df <- tibble(
Species = sample (c("Dryas", "Salix", "Kobresia"), 100, replace = TRUE),
MDS1 = rnorm(100),
MDS2 = rnorm(100),
Treatment = sample(c("C","N","P","NP","W","WN","WP", "WNP"), 100, replace = TRUE)
) %>%
mutate(Water = startsWith(Treatment, "W"),
Species = factor(Species, levels = c("Dryas", "Salix", "Kobresia"))
)
ggplot(df, aes(x = MDS1, y = MDS2))+
scale_colour_manual(values=c("green","blue","red","darkgrey","darkgreen","orange","purple4","deeppink","turquoise2"), na.value = "transparent") +
scale_fill_manual(values=c("transparent","transparent", "transparent","transparent","darkgreen","orange","purple4","deeppink","turquoise2"), na.value = "transparent") +
scale_shape_manual(values=c(21, 22, 24), na.value = 1) +
geom_point(
aes(
color = Treatment,
fill = Treatment,
shape = Species
),
size = 5,
#color = "black",
)+
guides(
shape = guide_legend(override.aes = list(fill = "black")),
fill = guide_legend(override.aes = list(shape = 21))
)