rggplot2labelcurve-fittinggeom-text

add geom_label and geom_density to barplot distribution


I'm working on a plot showing the length distribution of some events in the human genome. I'm pretty OK with the end result but wish to add a geom_label for the highest count reporting the corresponding length on the x-axis as well as a geom_density distribution — if possible.

About the label, can the color reflect the same one corresponding to the associated bar unless it isn't the case by default? Thanks in advance!

Below the code I'm using and a dput() of the input alongside the final output I'm getting from the plot.

library(scico)
library(readr)
library(stringr)
library(ggplot2)

retros <- read_delim("/path/to/homo_sapiens_Retrogenes.fasta.txt", delim = "\n", col_names = FALSE) #import the data

#wrangle
single_retro <- str_split(retros, ">retro_hsap_[0-9]+")

a=list()
for (i in seq_along(single_retro)){
  for (j in seq_along(single_retro[[i]])) {
    a <- c(a, single_retro[[i]][[j]])
  }
}; a[[1]] <- NULL

len=vector()
for (s in 1:length(a)) {
  len <- c(len, str_count(a[[s]], "[A-Z]"))
}

#plot
ggplot(as.data.frame(len), aes(len)) +
  geom_bar(color=scico(1229, palette='acton')) + 
  scale_x_continuous(breaks=seq(0,6500,250), expand=c(0, 0)) + 
  scale_y_continuous(limits=c(0,30), expand=c(0, 0)) + theme_bw()

dput(len) – first 1000 only

c(408L, 321L, 522L, 942L, 462L, 564L, 765L, 747L, 465L, 957L, 
993L, 1056L, 690L, 1554L, 1209L, 246L, 462L, 3705L, 1554L, 507L, 
681L, 1173L, 408L, 330L, 1317L, 240L, 576L, 2301L, 1911L, 1677L, 
1014L, 756L, 918L, 864L, 528L, 882L, 1131L, 1440L, 1167L, 1146L, 
1002L, 906L, 1056L, 1881L, 396L, 1278L, 501L, 1110L, 303L, 1176L, 
699L, 747L, 1971L, 3318L, 1875L, 450L, 354L, 1218L, 378L, 303L, 
777L, 915L, 5481L, 576L, 1920L, 2022L, 1662L, 519L, 936L, 423L, 
1149L, 600L, 1896L, 648L, 2238L, 1419L, 423L, 552L, 1299L, 1071L, 
963L, 471L, 408L, 729L, 1896L, 1068L, 1254L, 1179L, 1188L, 645L, 
978L, 903L, 1191L, 1119L, 747L, 1005L, 273L, 1191L, 519L, 930L, 
1053L, 2157L, 933L, 888L, 591L, 1287L, 457L, 294L, 291L, 669L, 
270L, 556L, 444L, 483L, 438L, 452L, 659L, 372L, 480L, 464L, 477L, 
256L, 350L, 357L, 524L, 477L, 218L, 192L, 216L, 587L, 473L, 525L, 
657L, 241L, 719L, 383L, 459L, 855L, 417L, 283L, 408L, 678L, 681L, 
1254L, 879L, 250L, 857L, 706L, 456L, 567L, 190L, 887L, 287L, 
240L, 960L, 587L, 361L, 816L, 297L, 290L, 253L, 335L, 609L, 507L, 
294L, 1475L, 464L, 780L, 552L, 555L, 1605L, 1127L, 382L, 579L, 
645L, 273L, 241L, 552L, 344L, 890L, 1346L, 1067L, 764L, 431L, 
796L, 569L, 1386L, 413L, 401L, 407L, 252L, 375L, 378L, 339L, 
457L, 1779L, 243L, 701L, 552L, 708L, 174L, 300L, 257L, 378L, 
777L, 729L, 969L, 603L, 378L, 436L, 348L, 399L, 1662L, 1511L, 
799L, 715L, 1400L, 399L, 516L, 399L, 355L, 1291L, 1286L, 657L, 
374L, 492L, 334L, 295L, 210L, 270L, 858L, 1487L, 1020L, 1641L, 
417L, 396L, 303L, 553L, 492L, 1097L, 612L, 441L, 654L, 611L, 
532L, 474L, 864L, 377L, 465L, 435L, 1003L, 608L, 486L, 748L, 
351L, 245L, 545L, 627L, 303L, 457L, 419L, 449L, 843L, 312L, 398L, 
704L, 315L, 330L, 1054L, 259L, 507L, 372L, 468L, 345L, 1303L, 
408L, 1031L, 471L, 653L, 925L, 397L, 231L, 684L, 449L, 336L, 
344L, 619L, 917L, 417L, 516L, 359L, 550L, 222L, 789L, 608L, 659L, 
853L, 360L, 657L, 372L, 305L, 353L, 650L, 564L, 547L, 969L, 505L, 
230L, 953L, 769L, 307L, 516L, 408L, 342L, 267L, 570L, 572L, 348L, 
1005L, 981L, 1586L, 1302L, 369L, 1290L, 1458L, 572L, 1122L, 363L, 
879L, 651L, 466L, 1203L, 485L, 440L, 473L, 810L, 1320L, 461L, 
455L, 258L, 660L, 297L, 285L, 424L, 273L, 378L, 432L, 293L, 410L, 
327L, 483L, 477L, 551L, 894L, 638L, 538L, 678L, 303L, 478L, 1046L, 
995L, 360L, 252L, 480L, 490L, 475L, 394L, 1185L, 357L, 361L, 
387L, 489L, 450L, 788L, 366L, 340L, 829L, 469L, 404L, 593L, 498L, 
840L, 601L, 235L, 452L, 395L, 504L, 299L, 662L, 357L, 686L, 683L, 
248L, 574L, 1108L, 587L, 483L, 1481L, 1297L, 1334L, 579L, 182L, 
456L, 1335L, 513L, 967L, 918L, 607L, 564L, 727L, 913L, 743L, 
312L, 480L, 659L, 939L, 705L, 1001L, 553L, 339L, 286L, 452L, 
744L, 519L, 521L, 491L, 565L, 522L, 377L, 861L, 812L, 523L, 332L, 
800L, 1015L, 1000L, 513L, 990L, 1003L, 733L, 542L, 940L, 399L, 
399L, 612L, 1361L, 399L, 399L, 318L, 319L, 510L, 504L, 841L, 
1529L, 506L, 1881L, 500L, 358L, 240L, 1261L, 354L, 519L, 779L, 
656L, 311L, 635L, 527L, 759L, 333L, 648L, 770L, 330L, 584L, 453L, 
632L, 513L, 998L, 343L, 696L, 1286L, 391L, 374L, 893L, 375L, 
426L, 658L, 455L, 518L, 466L, 417L, 614L, 285L, 480L, 845L, 344L, 
534L, 572L, 1727L, 1085L, 480L, 468L, 192L, 348L, 578L, 2433L, 
390L, 1031L, 1129L, 626L, 735L, 963L, 439L, 272L, 806L, 743L, 
560L, 250L, 679L, 459L, 207L, 905L, 616L, 404L, 489L, 582L, 340L, 
435L, 1632L, 417L, 221L, 279L, 462L, 357L, 288L, 248L, 981L, 
1015L, 935L, 678L, 279L, 348L, 470L, 958L, 867L, 352L, 735L, 
293L, 911L, 460L, 767L, 386L, 531L, 411L, 192L, 742L, 373L, 1454L, 
970L, 285L, 468L, 273L, 1527L, 612L, 983L, 552L, 998L, 553L, 
812L, 983L, 403L, 1706L, 781L, 183L, 405L, 891L, 647L, 1022L, 
946L, 476L, 270L, 471L, 888L, 435L, 354L, 563L, 526L, 877L, 1170L, 
351L, 863L, 1503L, 562L, 1174L, 345L, 385L, 275L, 374L, 171L, 
474L, 408L, 1640L, 345L, 462L, 722L, 1645L, 504L, 840L, 459L, 
783L, 501L, 473L, 609L, 684L, 543L, 353L, 788L, 684L, 734L, 242L, 
751L, 478L, 471L, 365L, 293L, 380L, 486L, 617L, 786L, 436L, 632L, 
624L, 386L, 925L, 469L, 405L, 2406L, 462L, 435L, 251L, 1118L, 
349L, 779L, 343L, 458L, 264L, 243L, 935L, 535L, 576L, 480L, 406L, 
606L, 495L, 396L, 456L, 798L, 404L, 285L, 375L, 922L, 1136L, 
330L, 339L, 559L, 998L, 239L, 587L, 468L, 1237L, 1722L, 699L, 
436L, 377L, 306L, 326L, 1076L, 385L, 537L, 315L, 342L, 386L, 
400L, 340L, 202L, 266L, 455L, 435L, 259L, 317L, 456L, 249L, 452L, 
1345L, 699L, 456L, 456L, 453L, 275L, 315L, 693L, 354L, 475L, 
780L, 415L, 956L, 554L, 258L, 418L, 996L, 552L, 511L, 1404L, 
469L, 262L, 398L, 242L, 350L, 538L, 379L, 300L, 460L, 373L, 276L, 
258L, 740L, 609L, 753L, 357L, 495L, 532L, 551L, 234L, 633L, 480L, 
312L, 898L, 350L, 705L, 265L, 345L, 334L, 334L, 582L, 583L, 582L, 
478L, 465L, 480L, 408L, 870L, 624L, 1107L, 303L, 384L, 1165L, 
1456L, 878L, 297L, 301L, 276L, 372L, 551L, 799L, 496L, 204L, 
552L, 791L, 330L, 359L, 480L, 468L, 414L, 1102L, 876L, 1112L, 
850L, 536L, 500L, 374L, 825L, 476L, 499L, 275L, 345L, 616L, 360L, 
609L, 310L, 260L, 376L, 283L, 390L, 1529L, 1310L, 207L, 1039L, 
661L, 570L, 1292L, 914L, 843L, 658L, 302L, 1119L, 609L, 225L, 
317L, 1091L, 225L, 403L, 544L, 495L, 912L, 744L, 473L, 985L, 
342L, 630L, 298L, 392L, 297L, 933L, 888L, 666L, 1023L, 346L, 
310L, 1134L, 840L, 1277L, 387L, 463L, 435L, 610L, 492L, 1107L, 
582L, 582L, 582L, 1307L, 647L, 1280L, 555L, 645L, 267L, 952L, 
588L, 348L, 287L, 507L, 410L, 737L, 731L, 354L, 2192L, 309L, 
388L, 692L, 389L, 742L, 766L, 1228L, 1640L, 237L, 495L, 351L, 
285L, 2443L, 963L, 296L, 420L, 482L, 246L, 553L, 621L, 405L, 
597L, 459L, 310L, 300L, 450L, 471L, 291L, 610L, 723L, 380L, 1439L, 
312L, 900L, 275L, 396L, 342L, 309L, 549L, 355L, 474L, 417L, 372L, 
384L, 291L, 987L, 629L, 407L, 655L, 357L, 473L, 348L, 459L, 599L, 
474L, 430L, 620L, 584L, 546L, 435L, 242L, 1167L, 627L, 378L, 
945L, 349L, 255L, 216L, 530L, 516L, 606L, 449L, 1490L, 401L, 
1070L, 899L, 452L, 1304L, 451L, 723L, 354L, 229L, 629L, 639L, 
501L, 465L, 344L, 1895L, 288L, 341L, 2377L, 542L, 453L, 291L, 
645L, 494L, 471L, 612L, 1294L, 713L, 1291L, 467L, 734L, 300L, 
1432L, 320L, 753L, 609L, 1051L, 231L, 875L, 704L, 438L, 742L, 
504L, 1334L, 738L, 342L, 435L, 1133L, 1229L, 436L, 310L, 494L, 
273L, 1228L, 626L, 470L, 235L, 1264L, 465L, 450L, 350L, 647L, 
541L, 256L, 231L, 435L, 485L, 224L, 555L, 395L, 300L, 969L, 237L
)

test_len


Solution

  • I've counted the len values first and then used geom_col instead of geom_bar just to simplify the code a bit.

    Len <- as.data.frame(len)
    cols <- scico(length(unique(len)), palette='acton')
    
    Len %>%
      count(len) %>%
      ggplot(aes(x=len)) +
      geom_col(color=cols, aes(y=n)) + 
      geom_density(y=after_stat(count)) +
      geom_label(data=count(Len, len) %>%
                   slice_max(n),
                 aes(y=n, label=n),
                 color=cols[which(table(len)==max(table(len)))]) +
      scale_x_continuous(limit=c(100,1600)) + # Adjust for the full data
      scale_y_continuous(breaks=0:max(table(len))) + 
      theme_bw()
    

    enter image description here