This was the question which was solved where I had colors coming from modules which I wanted to label. Now I would like to know how can I put the annotation if I have another node file I would like my graph something like this or similar where they have annotated the network based on functional enrichment of the genes
This is my basic code to generate the base graph my files
net_table = read.csv("TGCA_subtype_figure3/string_interactions.tsv",sep = "\t",check.names = FALSE)
head(net_table)
names(net_table)[1] = "node1"
net_table_filter = net_table %>% select(node1,node2,coexpression)
net_func = read.csv("TGCA_subtype_figure3/string_functional_annotations.tsv",check.names = FALSE,sep = "\t")
head(net_func)
names(net_func)[1] = "name"
edges = net_table_filter
names(edges)[1] = "source"
names(edges)[2] = "target"
names(edges)[3] = "weight"
nodes = net_func %>% select(name,`term description`)
names(nodes)[1] = "name"
names(nodes)[2] = "id"
nodes = nodes[!duplicated(nodes$name),]
g <- graph_from_data_frame(d=edges, vertices=nodes, directed=FALSE)
g
Plot the data
V(g) # nodes V(g)$name # names of each node vertex_attr(g) # all attributes of the nodes E(g) # edges E(g)$weight # weights for each edge edge_attr(g) # all attributes of the edges g[] # adjacency matrix
plot(g,
vertex.color = "grey", # change color of nodes
vertex.label.color = "black", # change color of labels
vertex.label.cex = .75, # change size of labels to 75% of original size
edge.curved=.25, # add a 25% curve to the edges
edge.color="grey20") # change edge color to grey
The output i get is this which looks not so good, now my question is
How do i use string_functional_annotations.tsv information to annotate the network.
I do see many nodes which are not connected so is there a way where if there are nodes which are without any interaction can be removed or not rendered in the final network
How did I get here I have list of genes which were used as input for stringdb then i downloaded two files one is network other is functional enrichment. I tried to format my data based on this tutorial
Any help or suggestion would be really appreciated
DATA UPDATE Nodes
structure(list(name = c("A2ML1", "A4GNT", "AARD", "ABCC6", "ABCG1",
"ABHD6", "ACACB", "ACCS", "ACOT12", "ACTL8", "ACTN1", "ACTN2",
"ACTR3C", "ACVR2A", "ADAD2", "ADAM28", "ADAM33", "ADAMTS12",
"ADAMTS4", "ADAMTS6", "ADAMTSL4", "ADD3", "ADGB", "ADRB2", "ADRB3",
"AGBL4", "AJUBA", "AKAP7", "AKR1B1", "ALDH3B1", "ALDH4A1", "AMDHD1",
"AMHR2", "AMOT", "ANKEF1", "ANKRD1", "ANKRD20A1", "ANO4", "ANO9",
"ANXA10", "AP1M2", "AP1S2", "APMAP", "APOL3", "AQP12B", "AQP3",
"AQP4", "ARAP2", "ARHGAP10", "ARHGAP20", "ARHGAP28", "ARHGAP4",
"ARHGEF15", "ARID5B", "ARL9", "ARMC5", "ARSH", "ASAP3", "ASIC2",
"ASNS", "ASTL", "ATP1A3", "ATP2B2", "ATP8A1", "AVPR2", "B4GALT2",
"B4GALT6", "B9D1", "BAGE5", "BCAR1", "BCAS4", "BCL9", "BCO1",
"BDH1", "BMI1", "BPIFB3", "BSG", "BTBD17", "BTN1A1", "C10orf55",
"C11orf96", "C15orf56", "C15orf65", "C17orf64", "C19orf33", "C1QTNF6",
"C1orf53", "C2orf72", "C2orf80", "C3orf84", "C4B", "C4BPB", "C4orf45",
"C4orf48", "C4orf50", "C4orf51", "C5orf46", "CA13", "CA7", "CACNA2D1"
), id = c("Negative regulation of metabolic process", "Carbohydrate metabolic process",
"Multicellular organism development", "System process", "Regulation of peptide secretion",
"Lipid metabolic process", "Organic acid metabolic process",
"Organic acid metabolic process", "Organic acid metabolic process",
"Organelle organization", "Cell morphogenesis", "MAPK cascade",
"Organelle organization", "Reproduction", "Nucleobase-containing compound metabolic process",
"Reproduction", "Proteolysis", "Proteoglycan metabolic process",
"Skeletal system development", "Blood vessel development", "Epithelial cell development",
"Transport", "Proteolysis", "Temperature homeostasis", "Regulation of protein phosphorylation",
"Immune effector process", "G2/M transition of mitotic cell cycle",
"Action potential", "Reproduction", "Cell activation", "Cellular aldehyde metabolic process",
"Organic acid metabolic process", "Reproduction", "Angiogenesis",
"Binding", "Negative regulation of transcription by rna polymerase ii",
"Plasma membrane", "Transport", "Transport", "Binding", "Immune system process",
"Immune system process", "Metabolic process", "Nitrogen compound metabolic process",
"Transport", "Response to hypoxia", "Immune system process",
"Cell communication", "Organelle organization", "Cell communication",
"Cell communication", "Regulation of cell growth", "Eye development",
"Reproduction", "Nucleotide binding", "Intracellular", "Catalytic activity",
"Movement of cell or subcellular component", "Nervous system process involved in regulation of systemic arterial blood pressure",
"Response to acid chemical", "Reproduction", "Transport", "System process",
"Cell activation", "Cytokine production", "System process", "Cell morphogenesis",
"Eye development", "Extracellular region", "Regulation of cell growth",
"Intracellular", "Regulation of transcription, dna-templated",
"Retinoid metabolic process", "Generation of precursor metabolites and energy",
"Negative regulation of transcription by rna polymerase ii",
"Immune system process", "Reproduction", "Response to external stimulus",
"Regulation of cytokine production", "Mixed, incl. zinc finger, c2h2 type, and prespliceosome",
"Mixed, incl. olfactory receptor, and krueppel-associated box",
"Anthropometric measurement", "Mixed, incl. williams-beuren syndrome, and cell cycle regulatory protein",
"Mixed, incl. rab-gtpase-tbc domain, and keratin, high sulfur b2 protein",
"Intracellular", "Binding", "Mostly uncharacterized, incl. akirin, and split hand-foot malformation 1",
"Mostly uncharacterized, incl. ly-6 antigen/upa receptor-like, and acetylcholine receptor regulator activity",
"Mostly uncharacterized, incl. magnesium ion transmembrane transport, and putative golgin subfamily a member 2-like protein 5",
"Mostly uncharacterized, incl. phospholipid translocation, and domain of unknown function duf4210",
"Response to molecule of bacterial origin", "Adaptive immune response",
"Mixed, incl. b-box-type zinc finger, and zinc finger, ring-type",
"Extracellular region", "Pulmonary function measurement", "Mixed, incl. mfs transporter superfamily, and tlv/env coat polyprotein",
"Extracellular region", "One-carbon metabolic process", "One-carbon metabolic process",
"Action potential")), row.names = c(1L, 288L, 389L, 406L, 900L,
1242L, 1453L, 1828L, 1875L, 1988L, 2046L, 2438L, 3075L, 3175L,
3557L, 3626L, 3743L, 3839L, 4010L, 4168L, 4297L, 4465L, 4698L,
4754L, 5278L, 5465L, 5708L, 6049L, 6216L, 6572L, 6749L, 6942L,
7041L, 7256L, 7540L, 7568L, 7960L, 7999L, 8114L, 8265L, 8305L,
8510L, 8824L, 8890L, 8990L, 9021L, 9206L, 9436L, 9554L, 9676L,
9741L, 9846L, 10034L, 10186L, 10492L, 10524L, 10669L, 10740L,
10893L, 11141L, 11446L, 11574L, 12118L, 12370L, 12676L, 13056L,
13244L, 13455L, 13852L, 13870L, 14131L, 14155L, 14326L, 14457L,
14599L, 14853L, 14891L, 15236L, 15274L, 15407L, 15420L, 15429L,
15438L, 15444L, 15455L, 15489L, 15536L, 15542L, 15546L, 15553L,
15559L, 15844L, 15983L, 16008L, 16018L, 16050L, 16062L, 16084L,
16139L, 16245L), class = "data.frame")
Edges
structure(list(source = c("A2ML1", "A2ML1", "ABCG1", "ABCG1",
"ABCG1", "ABCG1", "ABCG1", "ABHD6", "ACACB", "ACACB", "ACACB",
"ACACB", "ACACB", "ACACB", "ACOT12", "ACOT12", "ACTL8", "ACTL8",
"ACTN1", "ACTN1", "ACTN1", "ACTN1", "ACTN1", "ACTN1", "ACTN1",
"ACTN1", "ACTN1", "ACTN1", "ACTN1", "ACTN1", "ACTN1", "ACTN1",
"ACTN1", "ACTN1", "ACTN2", "ACTN2", "ACTN2", "ACTN2", "ACTN2",
"ACTN2", "ACTN2", "ACTN2", "ACTN2", "ACTN2", "ACTN2", "ACTN2",
"ACTN2", "ACTN2", "ACTR3C", "ACVR2A", "ACVR2A", "ACVR2A", "ACVR2A",
"ACVR2A", "ADAD2", "ADAD2", "ADAD2", "ADAD2", "ADAM33", "ADAM33",
"ADAMTS12", "ADAMTS12", "ADAMTS12", "ADAMTS4", "ADAMTS4", "ADAMTS4",
"ADAMTS4", "ADAMTS4", "ADAMTS4", "ADAMTS6", "ADAMTS6", "ADAMTS6",
"ADAMTS6", "ADAMTS6", "ADAMTS6", "ADAMTSL4", "ADAMTSL4", "ADAMTSL4",
"ADAMTSL4", "ADAMTSL4", "ADGB", "ADGB", "ADGB", "ADGB", "ADRB2",
"ADRB2", "ADRB2", "ADRB2", "ADRB2", "ADRB2", "ADRB2", "ADRB2",
"ADRB2", "ADRB3", "ADRB3", "ADRB3", "AGBL4", "AJUBA", "AJUBA",
"AKAP7"), target = c("C4orf51", "EPPK1", "LRP2", "NPC1L1", "DHCR7",
"JAM2", "PLTP", "CNR1", "G6PC", "ACOT12", "NXNL2", "LPIN1", "ME3",
"ELOVL6", "SLCO1A2", "ACACB", "C5orf46", "C4orf51", "ITGB4",
"DAPK2", "KIF1B", "MYH15", "GRIA4", "ITGB6", "MICALL2", "MYOZ2",
"CSRP2", "ITGA11", "FERMT2", "MYLK", "FSCN1", "OAS1", "BCAR1",
"ACTN2", "MYOM2", "GRIA4", "MICALL2", "MYOZ2", "CSRP2", "MEF2C",
"FERMT2", "MYLK", "KLHL31", "ANKRD1", "AQP4", "ACTN1", "BCAR1",
"SCN5A", "LRRC61", "DUSP2", "SMAD6", "ENG", "INHBB", "MSTN",
"ENO4", "GABRA3", "M1AP", "ADGB", "LTC4S", "ADRB2", "ADAMTS4",
"ADAMTSL4", "ADAMTS6", "EFEMP2", "MATN3", "ERMN", "ADAMTS6",
"ADAMTS12", "ADAMTSL4", "ARHGAP20", "ASAP3", "ADAMTS4", "ADAMTSL4",
"NRSN2", "ADAMTS12", "LTBP3", "ADAMTS4", "FBN3", "ADAMTS12",
"ADAMTS6", "ADAD2", "M1AP", "HIVEP3", "IQCA1", "OPRD1", "GABBR2",
"ADAM33", "F2R", "AVPR2", "CNR1", "EDN2", "ADRB3", "BSG", "MYOM2",
"ADRB2", "CEBPA", "SYN3", "WWTR1", "TNFSF4", "CDCP2"), weight = c(0,
0, 0, 0.123, 0, 0, 0.062, 0, 0.063, 0.065, 0, 0.064, 0.062, 0.107,
0.218, 0.065, 0, 0, 0.062, 0.062, 0, 0.095, 0, 0.111, 0.065,
0.265, 0.362, 0, 0.085, 0.183, 0.073, 0, 0.069, 0, 0.406, 0.098,
0, 0.49, 0.362, 0.077, 0.062, 0.183, 0.421, 0.146, 0.332, 0,
0.06, 0.081, 0.052, 0, 0.062, 0, 0, 0.062, 0, 0, 0, 0, 0, 0,
0.062, 0.062, 0.076, 0, 0, 0.083, 0, 0.062, 0, 0, 0, 0, 0.058,
0, 0.076, 0.064, 0, 0, 0.062, 0.058, 0, 0, 0, 0.053, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.052, 0.104, 0, 0)), row.names = c(NA,
100L), class = "data.frame")
PesKchan, in the absence of reproducible data (I suggest using dput() as you did previously), I am going to respond using the data set from your earlier question, and build on that answer. I draw heavily on this response so if you find this works for you, please consider voting up the original idea Repel text from edges in network. The approach that @JBGruber came up with is to create new nodes from the labels and "let the network layout algorithm do the work".
library(igraph)
library(ggraph)
library(dplyr)
dd <-
structure(list(gene1 = c("GBA3", "GBA3", "GBA3", "GBA3", "GBA3",
"GBA3", "GBA3", "GBA3", "GBA3", "GBA3", "GBA3", "GBA3", "GBA3",
"GBA3", "GBA3", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52",
"IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52",
"IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "IGHV3-52", "GGNBP1",
"GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1",
"GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1", "GGNBP1",
"OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6",
"OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6", "OR52B6",
"OR52B6"), gene2 = c("LRP2BP", "ADGB", "ASNSP3", "HSD17B2", "HSP90B1",
"IFT22", "P4HB", "TTC22", "XKR9", "IQSEC2", "NECAB2", "ANO1",
"CPPED1", "MAGEE1", "MAPRE3", "COTL1P1", "OR13G1", "FTH1P11",
"KRT8P44", "LINC00243", "MYOZ1", "PARD6G", "PDLIM5", "RN7SL67P",
"PARP3", "SH3BGRL3", "KIF1B", "CDK6", "CYP24A1", "TFEB", "LRP2BP",
"ADGB", "ASNSP3", "HSD17B2", "HSP90B1", "IFT22", "P4HB", "TTC22",
"XKR9", "IQSEC2", "NECAB2", "ANO1", "CPPED1", "MAGEE1", "MAPRE3",
"COTL1P1", "OR13G1", "FTH1P11", "KRT8P44", "LINC00243", "MYOZ1",
"PARD6G", "PDLIM5", "RN7SL67P", "PARP3", "SH3BGRL3", "KIF1B",
"CDK6", "CYP24A1", "TFEB"), correlation = c(1.19842058210312e-07,
3.95592260312023e-09, 1.18879994893077e-09, 3.67331679745971e-10,
5.48302012245219e-09, 7.97197389702251e-06, 9.7387584019434e-08,
5.77878345171157e-08, 1.01118703571283e-08, 1.81543845754574e-07,
3.7673420265534e-08, 1.02575704450652e-08, 4.82487451740043e-08,
1.65401803325697e-07, 2.95827225165244e-09, 1.35635056964288e-07,
1.16813988688191e-09, 1.34340296981193e-07, 5.26153755948588e-08,
5.06031471203736e-05, 1.63465042896832e-09, 2.10400523574347e-09,
1.08460550923374e-08, 1.09938266167239e-06, 3.31572488037795e-08,
3.97957891649769e-07, 2.0833042793021e-08, 4.16797585733493e-06,
1.02162139939232e-07, 3.74962089757379e-06, 5.10285758466629e-07,
0.000165189152741692, 0.000572780674091671, 2.43056928465514e-07,
0.00166978419035755, 2.3826397075692e-07, 0.000204964046470693,
1.32648351252772e-06, 2.79759921075308e-06, 1.11117833192239e-06,
6.87171744654038e-09, 2.33022551088771e-09, 2.7732284839245e-06,
1.74867497254059e-06, 1.16457488078883e-08, 2.58493584273799e-05,
0.000117632422231583, 0.000115191350816912, 3.45926695804785e-05,
6.60444623946169e-07, 8.48280303856373e-09, 9.3470012463335e-07,
2.33358874243648e-05, 9.13982092399789e-05, 6.16545562787355e-06,
0.0014007113940871, 1.549339320847e-06, 0.000373320941277797,
2.87750585085082e-08, 0.00105876974504533), module1 = structure(c(9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L), .Label = c("black", "blue", "brown", "cyan", "green",
"greenyellow", "grey", "magenta", "midnightblue", "pink", "purple",
"red", "salmon", "tan", "turquoise", "yellow"), class = "factor"),
module2 = structure(c(3L, 7L, 2L, 7L, 1L, 4L, 1L, 3L, 5L,
15L, 15L, 7L, 15L, 3L, 15L, 15L, 2L, 15L, 2L, 3L, 7L, 16L,
15L, 11L, 15L, 15L, 15L, 2L, 3L, 15L, 3L, 7L, 2L, 7L, 1L,
4L, 1L, 3L, 5L, 15L, 15L, 7L, 15L, 3L, 15L, 15L, 2L, 15L,
2L, 3L, 7L, 16L, 15L, 11L, 15L, 15L, 15L, 2L, 3L, 15L), .Label = c("black",
"blue", "brown", "cyan", "green", "greenyellow", "grey",
"magenta", "midnightblue", "pink", "purple", "red", "salmon",
"tan", "turquoise", "yellow"), class = "factor")), row.names = c(NA,
-60L), class = c("tbl_df", "tbl", "data.frame"))
library(igraph)
library(ggraph)
library(dplyr)
df1 <- dd %>%
select(gene = gene1, color = module1)
df2 <- dd %>%
select(gene = gene2, color = module2)
df_verts<- unique(rbind(df1,df2))
#using sample just to create this example
sample <- sample(df_verts$gene, 8)
#add the labels as separate vertices
df_verts_lab <- tibble(gene = paste0("Label",1:length(sample)), color = NA, label = sample)
df_verts$label <- NA
df_verts <- rbind(df_verts, df_verts_lab)
#Add fields to existing relationships to indicate if they are labels and a color for the edge
rel_base <- dd[,1:2] %>%
mutate(lab = 0, color = "steelblue")
#Create relationships for the labels
rel_lab <- tibble(gene1 = sample, gene2 = paste0("Label",1:length(sample)), lab = 1, color="grey")
rel <- rbind(rel_base, rel_lab)
g2 <- graph_from_data_frame(rel, vertices = df_verts)
ggraph(g2, layout = "igraph", algorithm = "fr") +
geom_node_point(aes(color = color,filter = is.na(label)), shape = 20, size = 1)+
geom_edge_link(aes(edge_color = color), width = 0.1) +
geom_node_text(aes(label = label, color = color, filter = !is.na(label))) +
theme_void() +
theme(legend.position = "none")