I'm working on a plot showing the length distribution of some events in the human genome. I'm pretty OK with the end result but wish to add a geom_label
for the highest count reporting the corresponding length on the x-axis as well as a geom_density
distribution — if possible.
About the label, can the color reflect the same one corresponding to the associated bar unless it isn't the case by default? Thanks in advance!
Below the code I'm using and a dput()
of the input alongside the final output I'm getting from the plot.
library(scico)
library(readr)
library(stringr)
library(ggplot2)
retros <- read_delim("/path/to/homo_sapiens_Retrogenes.fasta.txt", delim = "\n", col_names = FALSE) #import the data
#wrangle
single_retro <- str_split(retros, ">retro_hsap_[0-9]+")
a=list()
for (i in seq_along(single_retro)){
for (j in seq_along(single_retro[[i]])) {
a <- c(a, single_retro[[i]][[j]])
}
}; a[[1]] <- NULL
len=vector()
for (s in 1:length(a)) {
len <- c(len, str_count(a[[s]], "[A-Z]"))
}
#plot
ggplot(as.data.frame(len), aes(len)) +
geom_bar(color=scico(1229, palette='acton')) +
scale_x_continuous(breaks=seq(0,6500,250), expand=c(0, 0)) +
scale_y_continuous(limits=c(0,30), expand=c(0, 0)) + theme_bw()
dput(len)
– first 1000 only
c(408L, 321L, 522L, 942L, 462L, 564L, 765L, 747L, 465L, 957L,
993L, 1056L, 690L, 1554L, 1209L, 246L, 462L, 3705L, 1554L, 507L,
681L, 1173L, 408L, 330L, 1317L, 240L, 576L, 2301L, 1911L, 1677L,
1014L, 756L, 918L, 864L, 528L, 882L, 1131L, 1440L, 1167L, 1146L,
1002L, 906L, 1056L, 1881L, 396L, 1278L, 501L, 1110L, 303L, 1176L,
699L, 747L, 1971L, 3318L, 1875L, 450L, 354L, 1218L, 378L, 303L,
777L, 915L, 5481L, 576L, 1920L, 2022L, 1662L, 519L, 936L, 423L,
1149L, 600L, 1896L, 648L, 2238L, 1419L, 423L, 552L, 1299L, 1071L,
963L, 471L, 408L, 729L, 1896L, 1068L, 1254L, 1179L, 1188L, 645L,
978L, 903L, 1191L, 1119L, 747L, 1005L, 273L, 1191L, 519L, 930L,
1053L, 2157L, 933L, 888L, 591L, 1287L, 457L, 294L, 291L, 669L,
270L, 556L, 444L, 483L, 438L, 452L, 659L, 372L, 480L, 464L, 477L,
256L, 350L, 357L, 524L, 477L, 218L, 192L, 216L, 587L, 473L, 525L,
657L, 241L, 719L, 383L, 459L, 855L, 417L, 283L, 408L, 678L, 681L,
1254L, 879L, 250L, 857L, 706L, 456L, 567L, 190L, 887L, 287L,
240L, 960L, 587L, 361L, 816L, 297L, 290L, 253L, 335L, 609L, 507L,
294L, 1475L, 464L, 780L, 552L, 555L, 1605L, 1127L, 382L, 579L,
645L, 273L, 241L, 552L, 344L, 890L, 1346L, 1067L, 764L, 431L,
796L, 569L, 1386L, 413L, 401L, 407L, 252L, 375L, 378L, 339L,
457L, 1779L, 243L, 701L, 552L, 708L, 174L, 300L, 257L, 378L,
777L, 729L, 969L, 603L, 378L, 436L, 348L, 399L, 1662L, 1511L,
799L, 715L, 1400L, 399L, 516L, 399L, 355L, 1291L, 1286L, 657L,
374L, 492L, 334L, 295L, 210L, 270L, 858L, 1487L, 1020L, 1641L,
417L, 396L, 303L, 553L, 492L, 1097L, 612L, 441L, 654L, 611L,
532L, 474L, 864L, 377L, 465L, 435L, 1003L, 608L, 486L, 748L,
351L, 245L, 545L, 627L, 303L, 457L, 419L, 449L, 843L, 312L, 398L,
704L, 315L, 330L, 1054L, 259L, 507L, 372L, 468L, 345L, 1303L,
408L, 1031L, 471L, 653L, 925L, 397L, 231L, 684L, 449L, 336L,
344L, 619L, 917L, 417L, 516L, 359L, 550L, 222L, 789L, 608L, 659L,
853L, 360L, 657L, 372L, 305L, 353L, 650L, 564L, 547L, 969L, 505L,
230L, 953L, 769L, 307L, 516L, 408L, 342L, 267L, 570L, 572L, 348L,
1005L, 981L, 1586L, 1302L, 369L, 1290L, 1458L, 572L, 1122L, 363L,
879L, 651L, 466L, 1203L, 485L, 440L, 473L, 810L, 1320L, 461L,
455L, 258L, 660L, 297L, 285L, 424L, 273L, 378L, 432L, 293L, 410L,
327L, 483L, 477L, 551L, 894L, 638L, 538L, 678L, 303L, 478L, 1046L,
995L, 360L, 252L, 480L, 490L, 475L, 394L, 1185L, 357L, 361L,
387L, 489L, 450L, 788L, 366L, 340L, 829L, 469L, 404L, 593L, 498L,
840L, 601L, 235L, 452L, 395L, 504L, 299L, 662L, 357L, 686L, 683L,
248L, 574L, 1108L, 587L, 483L, 1481L, 1297L, 1334L, 579L, 182L,
456L, 1335L, 513L, 967L, 918L, 607L, 564L, 727L, 913L, 743L,
312L, 480L, 659L, 939L, 705L, 1001L, 553L, 339L, 286L, 452L,
744L, 519L, 521L, 491L, 565L, 522L, 377L, 861L, 812L, 523L, 332L,
800L, 1015L, 1000L, 513L, 990L, 1003L, 733L, 542L, 940L, 399L,
399L, 612L, 1361L, 399L, 399L, 318L, 319L, 510L, 504L, 841L,
1529L, 506L, 1881L, 500L, 358L, 240L, 1261L, 354L, 519L, 779L,
656L, 311L, 635L, 527L, 759L, 333L, 648L, 770L, 330L, 584L, 453L,
632L, 513L, 998L, 343L, 696L, 1286L, 391L, 374L, 893L, 375L,
426L, 658L, 455L, 518L, 466L, 417L, 614L, 285L, 480L, 845L, 344L,
534L, 572L, 1727L, 1085L, 480L, 468L, 192L, 348L, 578L, 2433L,
390L, 1031L, 1129L, 626L, 735L, 963L, 439L, 272L, 806L, 743L,
560L, 250L, 679L, 459L, 207L, 905L, 616L, 404L, 489L, 582L, 340L,
435L, 1632L, 417L, 221L, 279L, 462L, 357L, 288L, 248L, 981L,
1015L, 935L, 678L, 279L, 348L, 470L, 958L, 867L, 352L, 735L,
293L, 911L, 460L, 767L, 386L, 531L, 411L, 192L, 742L, 373L, 1454L,
970L, 285L, 468L, 273L, 1527L, 612L, 983L, 552L, 998L, 553L,
812L, 983L, 403L, 1706L, 781L, 183L, 405L, 891L, 647L, 1022L,
946L, 476L, 270L, 471L, 888L, 435L, 354L, 563L, 526L, 877L, 1170L,
351L, 863L, 1503L, 562L, 1174L, 345L, 385L, 275L, 374L, 171L,
474L, 408L, 1640L, 345L, 462L, 722L, 1645L, 504L, 840L, 459L,
783L, 501L, 473L, 609L, 684L, 543L, 353L, 788L, 684L, 734L, 242L,
751L, 478L, 471L, 365L, 293L, 380L, 486L, 617L, 786L, 436L, 632L,
624L, 386L, 925L, 469L, 405L, 2406L, 462L, 435L, 251L, 1118L,
349L, 779L, 343L, 458L, 264L, 243L, 935L, 535L, 576L, 480L, 406L,
606L, 495L, 396L, 456L, 798L, 404L, 285L, 375L, 922L, 1136L,
330L, 339L, 559L, 998L, 239L, 587L, 468L, 1237L, 1722L, 699L,
436L, 377L, 306L, 326L, 1076L, 385L, 537L, 315L, 342L, 386L,
400L, 340L, 202L, 266L, 455L, 435L, 259L, 317L, 456L, 249L, 452L,
1345L, 699L, 456L, 456L, 453L, 275L, 315L, 693L, 354L, 475L,
780L, 415L, 956L, 554L, 258L, 418L, 996L, 552L, 511L, 1404L,
469L, 262L, 398L, 242L, 350L, 538L, 379L, 300L, 460L, 373L, 276L,
258L, 740L, 609L, 753L, 357L, 495L, 532L, 551L, 234L, 633L, 480L,
312L, 898L, 350L, 705L, 265L, 345L, 334L, 334L, 582L, 583L, 582L,
478L, 465L, 480L, 408L, 870L, 624L, 1107L, 303L, 384L, 1165L,
1456L, 878L, 297L, 301L, 276L, 372L, 551L, 799L, 496L, 204L,
552L, 791L, 330L, 359L, 480L, 468L, 414L, 1102L, 876L, 1112L,
850L, 536L, 500L, 374L, 825L, 476L, 499L, 275L, 345L, 616L, 360L,
609L, 310L, 260L, 376L, 283L, 390L, 1529L, 1310L, 207L, 1039L,
661L, 570L, 1292L, 914L, 843L, 658L, 302L, 1119L, 609L, 225L,
317L, 1091L, 225L, 403L, 544L, 495L, 912L, 744L, 473L, 985L,
342L, 630L, 298L, 392L, 297L, 933L, 888L, 666L, 1023L, 346L,
310L, 1134L, 840L, 1277L, 387L, 463L, 435L, 610L, 492L, 1107L,
582L, 582L, 582L, 1307L, 647L, 1280L, 555L, 645L, 267L, 952L,
588L, 348L, 287L, 507L, 410L, 737L, 731L, 354L, 2192L, 309L,
388L, 692L, 389L, 742L, 766L, 1228L, 1640L, 237L, 495L, 351L,
285L, 2443L, 963L, 296L, 420L, 482L, 246L, 553L, 621L, 405L,
597L, 459L, 310L, 300L, 450L, 471L, 291L, 610L, 723L, 380L, 1439L,
312L, 900L, 275L, 396L, 342L, 309L, 549L, 355L, 474L, 417L, 372L,
384L, 291L, 987L, 629L, 407L, 655L, 357L, 473L, 348L, 459L, 599L,
474L, 430L, 620L, 584L, 546L, 435L, 242L, 1167L, 627L, 378L,
945L, 349L, 255L, 216L, 530L, 516L, 606L, 449L, 1490L, 401L,
1070L, 899L, 452L, 1304L, 451L, 723L, 354L, 229L, 629L, 639L,
501L, 465L, 344L, 1895L, 288L, 341L, 2377L, 542L, 453L, 291L,
645L, 494L, 471L, 612L, 1294L, 713L, 1291L, 467L, 734L, 300L,
1432L, 320L, 753L, 609L, 1051L, 231L, 875L, 704L, 438L, 742L,
504L, 1334L, 738L, 342L, 435L, 1133L, 1229L, 436L, 310L, 494L,
273L, 1228L, 626L, 470L, 235L, 1264L, 465L, 450L, 350L, 647L,
541L, 256L, 231L, 435L, 485L, 224L, 555L, 395L, 300L, 969L, 237L
)
I've counted the len values first and then used geom_col
instead of geom_bar
just to simplify the code a bit.
Len <- as.data.frame(len)
cols <- scico(length(unique(len)), palette='acton')
Len %>%
count(len) %>%
ggplot(aes(x=len)) +
geom_col(color=cols, aes(y=n)) +
geom_density(y=after_stat(count)) +
geom_label(data=count(Len, len) %>%
slice_max(n),
aes(y=n, label=n),
color=cols[which(table(len)==max(table(len)))]) +
scale_x_continuous(limit=c(100,1600)) + # Adjust for the full data
scale_y_continuous(breaks=0:max(table(len))) +
theme_bw()