Is it possible to generate a barplot like in the following link using ggplot?
https://photos.app.goo.gl/E3MC461dKaTZfHza9
here is what I did
library(ggplot2)
df <- read.csv(text=
"trt,gene,freq,cols
M6,ALDH16A1,100.0000000,red
M6,Others,0.0000000,lightgrey
M12,ALDH16A1,64.6638015,red
M12,GBE1,2.0074865,#4C00FF
M12,ZNF598,1.5832525,#004CFF
M12,CHMP6,1.3503397,#00E5FF
M12,C20orf27,1.2033828,#00FF4D
M12,NEGR1,0.9676972,#4DFF00
M12,TNFAIP6,0.9122418,#E6FF00
M12,ZSCAN25,0.7375572,#FFFF00
M12,BCL2,0.6848745,#FFDE59
M12,CBL,0.6765562,#FFE0B3
M12,Others,25.2128102,lightgrey
M18,ALDH16A1,42.4503581,red
M18,ATF2,2.2360682,#4C00FF
M18,DIAPH1,1.5256507,#004CFF
M18,SESTD1,1.2053805,#00E5FF
M18,TFCP2,1.1587958,#00FF4D
M18,SCAPER,1.1180341,#4DFF00
M18,CUX1,1.0306877,#E6FF00
M18,TEX10,0.9841030,#FFFF00
M18,C6orf89,0.9666337,#FFDE59
M18,PTTG1IP,0.9258720,#FFE0B3
M18,Others,46.3984161,lightgrey")
df$trt <- factor(df$trt,levels=unique(as.character(df$trt)))
df$gene <- factor(df$gene,levels = unique(as.character(df$gene)))
ggplot(df, aes(x=trt,y=freq, fill = gene))+geom_bar(stat = "identity", width = 0.5,color="black") + theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 4))
df$cols is the color I want to use to label different gene in M6, M12,M18 as shown in Figure, and in each bar, the 'Others' of df$gene is always in the bottom of bar in M6,M12,M18
Thank you
Aimin
I modified your code to generate the similar plot for another data set , however I got some new question, for example:
library(dplyr)
library(tidyverse)
library(ggnewscale)
df <- read.csv(text='"trt","gene","freq","cols"
"100.0.250ng_CellLine_0","ALDH16A1",100,"red"
"100.0.250ng_CellLine_0","Others",0,"lightgrey"
"75.25.250ng_CellLine_0","ALDH16A1",64.6638014695688,"red"
"75.25.250ng_CellLine_0","GBE1",2.0074864827395,"#4C00FF"
"75.25.250ng_CellLine_0","ZNF598",1.5832524608346,"#004CFF"
"75.25.250ng_CellLine_0","CHMP6",1.35033966449466,"#00E5FF"
"75.25.250ng_CellLine_0","C20orf27",1.2033827810897,"#00FF4D"
"75.25.250ng_CellLine_0","NEGR1",0.967697213364758,"#4DFF00"
"75.25.250ng_CellLine_0","TNFAIP6",0.912241785664772,"#E6FF00"
"75.25.250ng_CellLine_0","ZSCAN25",0.737557188409816,"#FFFF00"
"75.25.250ng_CellLine_0","BCL2",0.684874532094829,"#FFDE59"
"75.25.250ng_CellLine_0","CBL",0.676556217939831,"#FFE0B3"
"75.25.250ng_CellLine_0","Others",25.2128102037987,"lightgrey"
"50.50.250ng_CellLine_0","ALDH16A1",42.4503581203051,"red"
"50.50.250ng_CellLine_0","ATF2",2.23606824666628,"#4C00FF"
"50.50.250ng_CellLine_0","DIAPH1",1.52565073079835,"#004CFF"
"50.50.250ng_CellLine_0","SESTD1",1.20538053921854,"#00E5FF"
"50.50.250ng_CellLine_0","TFCP2",1.15879578407966,"#00FF4D"
"50.50.250ng_CellLine_0","SCAPER",1.11803412333314,"#4DFF00"
"50.50.250ng_CellLine_0","CUX1",1.03068770744774,"#E6FF00"
"50.50.250ng_CellLine_0","TEX10",0.984102952308857,"#FFFF00"
"50.50.250ng_CellLine_0","C6orf89",0.966633669131777,"#FFDE59"
"50.50.250ng_CellLine_0","PTTG1IP",0.925872008385256,"#FFE0B3"
"50.50.250ng_CellLine_0","Others",46.3984161183253,"lightgrey"
"10.90.250ng_CellLine_0","ALDH16A1",4.68952007835455,"red"
"10.90.250ng_CellLine_0","STK11",1.93143976493634,"#4C00FF"
"10.90.250ng_CellLine_0","ERGIC2",1.46523016650343,"#004CFF"
"10.90.250ng_CellLine_0","EFR3A",1.1126346718903,"#00E5FF"
"10.90.250ng_CellLine_0","TMEM235",1.03819784524976,"#00FF4D"
"10.90.250ng_CellLine_0","NGLY1",1.01469147894221,"#4DFF00"
"10.90.250ng_CellLine_0","CNOT10",0.991185112634672,"#E6FF00"
"10.90.250ng_CellLine_0","NPLOC4",0.983349657198825,"#FFFF00"
"10.90.250ng_CellLine_0","GZMB",0.928501469147894,"#FFDE59"
"10.90.250ng_CellLine_0","KIF2C",0.924583741429971,"#FFE0B3"
"10.90.250ng_CellLine_0","Others",84.9206660137121,"lightgrey"
"1.99.250ng_CellLine_0","DNAH1",2.36284289276808,"red"
"1.99.250ng_CellLine_0","ALOX5AP",2.29426433915212,"#4C00FF"
"1.99.250ng_CellLine_0","SEPT7",1.78304239401496,"#004CFF"
"1.99.250ng_CellLine_0","TCF20",1.35910224438903,"#00E5FF"
"1.99.250ng_CellLine_0","USP32",1.27805486284289,"#00FF4D"
"1.99.250ng_CellLine_0","MUS81",1.24688279301746,"#4DFF00"
"1.99.250ng_CellLine_0","CEP44",1.22817955112219,"#E6FF00"
"1.99.250ng_CellLine_0","TMEM164",1.20324189526185,"#FFFF00"
"1.99.250ng_CellLine_0","RAP1B",1.18453865336658,"#FFDE59"
"1.99.250ng_CellLine_0","GSN",1.14713216957606,"#FFE0B3"
"1.99.250ng_CellLine_0","Others",84.9127182044888,"lightgrey"
"0.100.250ng_CellLine_0","RTN3",2.3050199437531,"red"
"0.100.250ng_CellLine_0","CHTF18",1.67637814091135,"#4C00FF"
"0.100.250ng_CellLine_0","RNPS1",1.41168685550429,"#004CFF"
"0.100.250ng_CellLine_0","RBKS",1.05325073984891,"#00E5FF"
"0.100.250ng_CellLine_0","ZNF805",0.987077918497142,"#00FF4D"
"0.100.250ng_CellLine_0","TMBIM6",0.865761079352242,"#4DFF00"
"0.100.250ng_CellLine_0","RP3-449O17.1",0.841865338308549,"#E6FF00"
"0.100.250ng_CellLine_0","RNASEH2A",0.814293329411981,"#FFFF00"
"0.100.250ng_CellLine_0","FAM46A",0.810617061559105,"#FFDE59"
"0.100.250ng_CellLine_0","CYB561A3",0.79775012407404,"#FFE0B3"
"0.100.250ng_CellLine_0","Others",88.4362994687793,"lightgrey"
"100.0.500ng_CellLine_0","ALDH16A1",100,"red"
"100.0.500ng_CellLine_0","Others",0,"lightgrey"
"75.25.500ng_CellLine_0","ALDH16A1",64.6680558047111,"red"
"75.25.500ng_CellLine_0","STX18",0.76034608856445,"#4C00FF"
"75.25.500ng_CellLine_0","BCL7A",0.685829412008224,"#004CFF"
"75.25.500ng_CellLine_0","PTPRC",0.634771689182662,"#00E5FF"
"75.25.500ng_CellLine_0","GABRB1",0.626492058454193,"#00FF4D"
"75.25.500ng_CellLine_0","EDNRB",0.59751335090455,"#4DFF00"
"75.25.500ng_CellLine_0","TBC1D10C",0.538175997350518,"#E6FF00"
"75.25.500ng_CellLine_0","SRGAP2B",0.534036181986283,"#FFFF00"
"75.25.500ng_CellLine_0","RABGAP1",0.527136489712559,"#FFDE59"
"75.25.500ng_CellLine_0","CD44",0.485738336070211,"#FFE0B3"
"75.25.500ng_CellLine_0","Others",29.9419045910552,"lightgrey"
"50.50.500ng_CellLine_0","ALDH16A1",40.5808575357307,"red"
"50.50.500ng_CellLine_0","TNPO1",0.979207466977791,"#4C00FF"
"50.50.500ng_CellLine_0","RNA5SP443",0.93337222384266,"#004CFF"
"50.50.500ng_CellLine_0","MND1",0.912538022417601,"#00E5FF"
"50.50.500ng_CellLine_0","RB1",0.900037501562565,"#00FF4D"
"50.50.500ng_CellLine_0","PTPRA",0.791699654152256,"#4DFF00"
"50.50.500ng_CellLine_0","SUCNR1",0.783365973582233,"#E6FF00"
"50.50.500ng_CellLine_0","MIR1284",0.625026042751781,"#FFFF00"
"50.50.500ng_CellLine_0","RWDD1",0.587524480186674,"#FFDE59"
"50.50.500ng_CellLine_0","NTN1",0.575023959331639,"#FFE0B3"
"50.50.500ng_CellLine_0","Others",52.3313471394641,"lightgrey"
"10.90.500ng_CellLine_0","ALDH16A1",7.05601485476812,"red"
"10.90.500ng_CellLine_0","ENTPD5",1.4722136257129,"#4C00FF"
"10.90.500ng_CellLine_0","MFSD10",1.28210796233255,"#004CFF"
"10.90.500ng_CellLine_0","LENG8-AS1",0.915159821389098,"#00E5FF"
"10.90.500ng_CellLine_0","FRMD4B",0.884212387815553,"#00FF4D"
"10.90.500ng_CellLine_0","TWISTNB",0.853264954242009,"#4DFF00"
"10.90.500ng_CellLine_0","ZNF544",0.778106901277687,"#E6FF00"
"10.90.500ng_CellLine_0","NUDCD1",0.738317343825987,"#FFFF00"
"10.90.500ng_CellLine_0","PHF20",0.720633096069676,"#FFDE59"
"10.90.500ng_CellLine_0","HNRNPK",0.702948848313365,"#FFE0B3"
"10.90.500ng_CellLine_0","Others",84.5970202042531,"lightgrey"
"1.99.500ng_CellLine_0","SND1",2.97318305479984,"red"
"1.99.500ng_CellLine_0","ATF1",2.18940277237984,"#4C00FF"
"1.99.500ng_CellLine_0","CARM1",1.96916699054282,"#004CFF"
"1.99.500ng_CellLine_0","OR4K15",1.28902707604612,"#00E5FF"
"1.99.500ng_CellLine_0","MTMR3",1.26311698406529,"#00FF4D"
"1.99.500ng_CellLine_0","CDK13",1.13356652416116,"#4DFF00"
"1.99.500ng_CellLine_0","RNU6-385P",1.0752688172043,"#E6FF00"
"1.99.500ng_CellLine_0","SLC4A2",0.809690374400829,"#FFFF00"
"1.99.500ng_CellLine_0","TMF1",0.770825236429589,"#FFDE59"
"1.99.500ng_CellLine_0","MAN1A1",0.738437621453556,"#FFE0B3"
"1.99.500ng_CellLine_0","Others",85.7883145485167,"lightgrey"
"0.100.500ng_CellLine_0","ALYREF",1.53269861089433,"red"
"0.100.500ng_CellLine_0","HCG18",1.51084751053535,"#4C00FF"
"0.100.500ng_CellLine_0","RNU7-146P",0.922428593725613,"#004CFF"
"0.100.500ng_CellLine_0","ST3GAL3",0.884969564538786,"#00E5FF"
"0.100.500ng_CellLine_0","HSF1",0.811612299047916,"#00FF4D"
"0.100.500ng_CellLine_0","HP1BP3",0.792882784454503,"#4DFF00"
"0.100.500ng_CellLine_0","DAOA",0.736694240674262,"#E6FF00"
"0.100.500ng_CellLine_0","CDK13",0.689870454190729,"#FFFF00"
"0.100.500ng_CellLine_0","PDXDC1",0.680505696894022,"#FFDE59"
"0.100.500ng_CellLine_0","CKAP5",0.647729046355549,"#FFE0B3"
"0.100.500ng_CellLine_0","Others",90.7897611986889,"lightgrey"'
,sep=",",header=T)
g <- unique(as.character(df$gene))
i <- which(g == "Others")
g <- c(g[-i], g[i])
df$trt <- factor(df$trt,levels=unique(as.character(df$trt)))
df$gene <- factor(df$gene,levels = g)
cols <- dplyr::select(df, gene, cols) %>%
distinct() %>%
deframe()
tr <- levels(df$trt)
p <- ggplot() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[1]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[1], ncol = 1, title.position = "top")) +
new_scale_fill() + # Define scales before initiating a new one
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[2]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[2], ncol = 1, title.position = "top")) +
new_scale_fill() + # Define scales before initiating a new one
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[3]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[3], ncol = 1, title.position = "top")) +
new_scale_fill() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[4]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[4], ncol = 1, title.position = "top")) +
new_scale_fill() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[5]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[5], ncol = 1, title.position = "top")) +
new_scale_fill() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[6]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[6], ncol = 1, title.position = "top")) +
new_scale_fill() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[7]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[7], ncol = 1, title.position = "top")) +
new_scale_fill() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[8]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[8], ncol = 1, title.position = "top")) +
new_scale_fill() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[9]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[9], ncol = 1, title.position = "top")) +
new_scale_fill() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[10]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[10], ncol = 1, title.position = "top")) +
new_scale_fill() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[11]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[11], ncol = 1, title.position = "top")) +
new_scale_fill() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = dplyr::filter(df, trt == tr[12]), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = tr[12], ncol = 1, title.position = "top")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 4), legend.position = "bottom", legend.justification = 0)
p
I want the stacked bar and its legend following the order as tr from left to right like the following:
"100.0.250ng_CellLine_0" "75.25.250ng_CellLine_0" "50.50.250ng_CellLine_0" "10.90.250ng_CellLine_0" "1.99.250ng_CellLine_0" "0.100.250ng_CellLine_0" "100.0.500ng_CellLine_0" "75.25.500ng_CellLine_0" "50.50.500ng_CellLine_0" "10.90.500ng_CellLine_0" "1.99.500ng_CellLine_0" "0.100.500ng_CellLine_0"
However, It seems the above code does not generate the stacked bar as this order
In addition, for '0.100.500ng_CellLine_0' in df, the order for gene and color in stacked bar is not same as the order in df:
0.100.500ng_CellLine_0 ALYREF 1.5326986 red
0.100.500ng_CellLine_0 HCG18 1.5108475 #4C00FF
0.100.500ng_CellLine_0 RNU7-146P 0.9224286 #004CFF
0.100.500ng_CellLine_0 ST3GAL3 0.8849696 #00E5FF
0.100.500ng_CellLine_0 HSF1 0.8116123 #00FF4D
0.100.500ng_CellLine_0 HP1BP3 0.7928828 #4DFF00
0.100.500ng_CellLine_0 DAOA 0.7366942 #E6FF00
0.100.500ng_CellLine_0 CDK13 0.6898705 #FFFF00
0.100.500ng_CellLine_0 PDXDC1 0.6805057 #FFDE59
0.100.500ng_CellLine_0 CKAP5 0.6477290 #FFE0B3
0.100.500ng_CellLine_0 Others 90.7897612 lightgrey'
Another question is:
tr has 12 treatments, I have to add new_scale_fill() for each treatment, so I get long code, Is there possible to simply this?
Thank you Aimin
Try this. Simply reorder the factor and use scale_fill_manual
to set the fill colors.
library(tidyverse)
df$trt <- factor(df$trt,levels=unique(as.character(df$trt)))
df$gene <- factor(df$gene,levels = unique(as.character(df$gene)))
# Reorder factor
df$gene <- forcats::fct_relevel(df$gene, "Others", after = 0)
df$gene <- forcats::fct_rev(df$gene)
# named vector of fill colors
cols <- select(df, gene, cols) %>%
distinct() %>%
deframe()
p <- ggplot(df, aes(x = trt, y = freq, fill = gene)) +
geom_bar(stat = "identity", color = "black") +
scale_fill_manual(values = cols) +
theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 4))
Created on 2020-06-05 by the reprex package (v0.3.0)
EDIT Separate legends for the single groups can be achieved via ggnewscale::new_scale_fill
. To get the correct order along the x-axis I make use of facetting. Try this:
library(tidyverse)
library(ggnewscale)
df$trt <- factor(df$trt,levels=unique(as.character(df$trt)))
df$gene <- factor(df$gene,levels = unique(as.character(df$gene)))
# Reorder factor
df$gene <- forcats::fct_relevel(df$gene, "Others", after = 0)
df$gene <- forcats::fct_rev(df$gene)
# named vector of fill colors
cols <- select(df, gene, cols) %>%
distinct() %>%
deframe()
p <- ggplot() +
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = filter(df, trt == "M6"), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = "M6", ncol = 2, title.position = "top")) +
new_scale_fill() + # Define scales before initiating a new one
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = filter(df, trt == "M12"), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = "M12", ncol = 2, title.position = "top")) +
new_scale_fill() + # Define scales before initiating a new one
geom_bar(mapping = aes(x = trt, y = freq, fill = gene), data = filter(df, trt == "M18"), stat = "identity", color = "black") +
scale_fill_manual(values = cols, guide = guide_legend(title = "M18", ncol = 2, title.position = "top")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1,size = 4), legend.position = "bottom", legend.justification = 0) +
facet_wrap(~ trt, scales = "free_x")
p
Created on 2020-06-05 by the reprex package (v0.3.0)
EDIT 2
To simplify the code you can use a loop. I make use of some helper functions and purrr::reduce
but a simple for
loop will also do the job.
The reordering of the x-axis however requires a bit of a hack. The problem is that by splitting the data we lose the order of the categories. As a solution I use facetting to bring the order back in but get rid of the striptext and spacing between facets.
library(dplyr)
library(tidyverse)
library(ggnewscale)
g <- unique(as.character(df$gene))
i <- which(g == "Others")
g <- c(g[-i], g[i])
# Order and trim trt
df$trt <- stringr::str_trim(df$trt)
df$trt <- forcats::fct_inorder(df$trt)
tr <- levels(df$trt)
col_vec <- dplyr::select(df, gene, cols) %>%
distinct() %>%
deframe()
# Helper functions
make_df <- function(d, x) {
filter(d, trt == tr[x]) %>%
mutate(gene = forcats::fct_inorder(gene),
gene = forcats::fct_relevel(gene, "Others", after = length(levels(gene)) - 1)) %>%
arrange(gene) %>%
mutate(gene_order = as.numeric(gene))
}
# geom
help_geom <- function(x) {
geom_bar(aes(x = trt, y = freq, fill = gene), data = df_list[[x]], stat = "identity", color = "black")
}
# scale
help_scale <- function(x) {
scale_fill_manual(values = col_vec,
guide = guide_legend(order = x, title = tr[x], ncol = 1,
title.position = "top", title.theme = element_text(size = 4)))
}
# help for the loop
help_reduce <- function(p, x) {
p + new_scale_fill() + help_geom(x) + help_scale(x)
}
# List of df
df_list <- map(1:12, ~ make_df(df, .x))
# Init plot
p <- ggplot() + help_geom(1) + help_scale(1)
# Loop over trt
p <- reduce(c(2:12), help_reduce, .init = p)
# Add theme and wrap
p +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 4),
legend.text = element_text(size = 6),
legend.position = "bottom", legend.justification = 0,
strip.text = element_blank(),
panel.spacing.x = unit(0, "pt")) +
facet_wrap(~trt, scales = "free_x", nrow = 1)
Created on 2020-06-06 by the reprex package (v0.3.0)