rggplot2

How to get geom_text to always show above geom_bar


I'm working with the Insurance data set. Created a summary of the Insurance data set, where the charges are sorted from highest to lowest, and labeled as "Highest_five_percent" and "Lowest_five_percent"

summary <- dput(structure(list(age = c(54, 45, 52, 31, 33, 60, 28, 64, 59, 44, 
63, 57, 60, 54, 61, 60, 64, 59, 58, 51, 61, 63, 64, 52, 63, 64, 
62, 53, 61, 51, 44, 46, 60, 37, 43, 62, 48, 60, 51, 29, 22, 55, 
51, 54, 47, 34, 56, 42, 56, 36, 57, 53, 44, 47, 50, 45, 43, 55, 
47, 43, 25, 46, 54, 38, 50, 47, 46, 19, 19, 19, 19, 19, 19, 19, 
18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 22, 22, 22, 
19, 19, 19, 19, 18, 18, 18, 19, 19, 18, 18, 18, 19, 19, 19, 18, 
18, 19, 18, 18, 21, 21, 21, 21, 20, 19, 19, 19, 19, 19, 19, 19, 
19, 19, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18), sex = c("female", 
"male", "male", "female", "female", "male", "male", "male", "male", 
"female", "female", "male", "male", "male", "female", "male", 
"female", "female", "male", "male", "male", "female", "female", 
"male", "male", "male", "male", "female", "male", "female", "female", 
"male", "male", "female", "female", "male", "male", "female", 
"female", "male", "male", "female", "female", "male", "male", 
"female", "male", "female", "male", "male", "female", "male", 
"female", "female", "male", "male", "male", "male", "male", "male", 
"male", "female", "male", "male", "male", "male", "female", "female", 
"female", "female", "female", "female", "female", "female", "male", 
"male", "male", "male", "male", "male", "male", "male", "male", 
"male", "male", "male", "male", "male", "male", "male", "male", 
"male", "male", "male", "female", "female", "female", "male", 
"male", "female", "female", "female", "male", "male", "male", 
"female", "female", "male", "female", "female", "male", "male", 
"male", "male", "male", "male", "male", "male", "male", "male", 
"male", "male", "male", "male", "male", "male", "male", "male", 
"male", "male", "male", "male", "male", "male"), bmi = c(47, 
30, 34, 38, 36, 33, 36, 37, 41, 38, 38, 42, 41, 41, 36, 40, 34, 
37, 37, 43, 36, 32, 31, 42, 35, 34, 31, 37, 36, 37, 44, 42, 31, 
48, 46, 32, 41, 32, 35, 36, 53, 35, 38, 34, 39, 30, 34, 40, 32, 
42, 31, 34, 39, 37, 34, 36, 38, 31, 36, 36, 46, 36, 31, 38, 32, 
36, 35, 33, 30, 29, 25, 21, 19, 18, 35, 34, 30, 29, 28, 23, 26, 
25, 24, 23, 23, 21, 16, 40, 34, 27, 36, 31, 31, 28, 40, 40, 39, 
26, 25, 38, 38, 37, 23, 22, 20, 31, 31, 17, 27, 21, 37, 36, 31, 
23, 33, 35, 34, 34, 30, 29, 28, 21, 20, 20, 53, 43, 41, 37, 34, 
34, 34, 33, 30, 23), children = c(0, 0, 3, 1, 0, 0, 1, 2, 1, 
0, 0, 1, 0, 3, 1, 0, 1, 1, 2, 2, 1, 2, 2, 2, 0, 0, 3, 3, 0, 3, 
2, 3, 3, 2, 0, 0, 2, 0, 2, 2, 1, 0, 0, 2, 2, 1, 0, 2, 2, 3, 0, 
0, 0, 1, 2, 2, 2, 0, 1, 3, 2, 0, 1, 3, 1, 0, 1, 0, 0, 0, 0, 0, 
0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), 
    smoker = c("yes", "yes", "yes", "yes", "yes", "yes", "yes", 
    "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", 
    "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", 
    "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", 
    "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", 
    "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", 
    "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", 
    "yes", "yes", "yes", "yes", "yes", "yes", "no", "no", "no", 
    "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
    "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
    "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
    "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
    "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
    "no", "no", "no", "no", "no", "no", "no", "no", "no", "no", 
    "no", "no", "no", "no"), region = c("southeast", "southeast", 
    "northwest", "northeast", "northwest", "southwest", "southwest", 
    "southeast", "southeast", "southeast", "southwest", "southeast", 
    "southeast", "northeast", "northeast", "southwest", "southwest", 
    "northeast", "northwest", "southeast", "southwest", "southwest", 
    "southwest", "southeast", "southeast", "southeast", "northwest", 
    "northwest", "southeast", "northeast", "southeast", "southeast", 
    "northwest", "southwest", "southeast", "northeast", "northwest", 
    "southeast", "northeast", "southwest", "southeast", "southeast", 
    "southeast", "southeast", "southeast", "northwest", "northwest", 
    "southeast", "southeast", "northeast", "northwest", "northeast", 
    "northwest", "southeast", "southwest", "northwest", "southeast", 
    "northeast", "southeast", "southeast", "southeast", "northeast", 
    "southeast", "southeast", "northeast", "southeast", "southwest", 
    "southwest", "southwest", "southwest", "southwest", "southwest", 
    "southwest", "southwest", "southeast", "southeast", "southeast", 
    "southeast", "northeast", "southeast", "northeast", "northeast", 
    "northeast", "northeast", "northeast", "northeast", "northeast", 
    "southwest", "southeast", "southeast", "northwest", "northwest", 
    "northwest", "northwest", "southeast", "southeast", "southeast", 
    "northwest", "northwest", "southeast", "southeast", "southeast", 
    "northwest", "northwest", "northwest", "southeast", "southeast", 
    "northwest", "southeast", "southeast", "southeast", "southeast", 
    "southwest", "southeast", "southeast", "southwest", "southwest", 
    "southwest", "southwest", "southwest", "southwest", "southwest", 
    "southwest", "southwest", "southeast", "southeast", "southeast", 
    "southeast", "southeast", "southeast", "southeast", "southeast", 
    "southeast", "southeast"), charges = c(63770, 62593, 60021, 
    58571, 55135, 52591, 51195, 49578, 48970, 48885, 48824, 48676, 
    48674, 48549, 48518, 48173, 47928, 47897, 47496, 47463, 47404, 
    47305, 47291, 47270, 47056, 46889, 46718, 46661, 46599, 46255, 
    46201, 46151, 46131, 46114, 45863, 45710, 45702, 45009, 44641, 
    44585, 44501, 44424, 44400, 44261, 44203, 43944, 43921, 43896, 
    43814, 43753, 43579, 43254, 42983, 42970, 42857, 42761, 42560, 
    42304, 42211, 42125, 42112, 42112, 42000, 41949, 41919, 41676, 
    41662, 1749, 1744, 1743, 1737, 1732, 1729, 1728, 1728, 1726, 
    1720, 1719, 1712, 1711, 1709, 1708, 1706, 1705, 1705, 1702, 
    1695, 1683, 1675, 1665, 1646, 1640, 1640, 1636, 1635, 1634, 
    1633, 1633, 1632, 1632, 1632, 1630, 1628, 1627, 1625, 1622, 
    1622, 1621, 1616, 1608, 1534, 1532, 1526, 1515, 1392, 1263, 
    1262, 1261, 1256, 1254, 1252, 1243, 1242, 1242, 1163, 1149, 
    1147, 1141, 1137, 1137, 1136, 1136, 1132, 1122), group = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), levels = c("Highest_five_percent", 
    "Lowest_five_percent"), class = "factor")), row.names = c(1L, 
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 
16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 
42L, 43L, 44L, 45L, 46L, 47L, 48L, 49L, 50L, 51L, 52L, 53L, 54L, 
55L, 56L, 57L, 58L, 59L, 60L, 61L, 62L, 63L, 64L, 65L, 66L, 67L, 
1272L, 1273L, 1274L, 1275L, 1276L, 1277L, 1278L, 1279L, 1280L, 
1281L, 1282L, 1283L, 1284L, 1285L, 1286L, 1287L, 1288L, 1289L, 
1290L, 1291L, 1292L, 1293L, 1294L, 1295L, 1296L, 1297L, 1298L, 
1299L, 1300L, 1301L, 1302L, 1303L, 1304L, 1305L, 1306L, 1307L, 
1308L, 1309L, 1310L, 1311L, 1312L, 1313L, 1314L, 1315L, 1316L, 
1317L, 1318L, 1319L, 1320L, 1321L, 1322L, 1323L, 1324L, 1325L, 
1326L, 1327L, 1328L, 1329L, 1330L, 1331L, 1332L, 1333L, 1334L, 
1335L, 1336L, 1337L, 1338L), class = "data.frame"))

Great so far. Next I want to create bar charts to show the results for each feature, separated by group (top 5% or bottom 5%). That works great, until I work to label each bar in the chart.

The larger values (charges) are easy to read because of the distance from the top of the bar, but the smaller values overlap with the bars, and are difficult (if not impossible in some cases) to read.

for (i in 1:6) {
  df1 <- aggregate(summary[, ncol(summary)-1], by = list(summary[, i], summary$group), FUN = sum)
  print(ggplot2::ggplot(df1, aes(x = Group.1, y = x)) +
          ggplot2::geom_col(aes(x = Group.1, y = x)) +
          ggplot2::labs(title = paste0(colnames(summary)[7], " by ", colnames(summary)[i])) +
          geom_text(mapping = aes(x = Group.1, y = 1.05*x, label = scales::comma(x), angle = 90, group = Group.1)) +
          facet_grid(~Group.2)
  )
}

Here is an example of the output that demonstrates the problem:

enter image description here

How can the text always show above the bar so it's always easy to read?


Solution

  • From a technical point of view - and as already suggested in the comments - align the labels to the right of the data point using hjust=0. Secondly, rather than manually adjusting the label positions to add padding, switch to geom_label, which allows you to set the padding amount in absolute units, such as 'pt' or 'mm'. By default, it is set to unit(0.25, 'lines'). Even after these adjustments, however, you will still need to make room for the labels, e.g. by increasing the expansion of the x scale at the upper end. To avoid this I would suggest to put the labels for larger values inside the bars.

    Finally, from a data viz point of view I would suggest the following:

    library(ggplot2)
    
    # Bin age and bmi
    summary$age <- cut_width(
      summary$age,
      width = 10
    )
    summary$bmi <- cut_width(
      summary$bmi,
      width = 10
    )
    plot_list <- lapply(1:6, \(i) {
      df1 <- aggregate(
        summary[, ncol(summary) - 1],
        by = list(summary[, i], summary$group), FUN = sum
      )
      max_y <- max(df1$x)
    
      ggplot(df1, aes(y = factor(Group.1), x = x)) +
        geom_col() +
        labs(
          y = NULL,
          title = paste0(
            colnames(summary)[7], " by ", colnames(summary)[i]
          )
        ) +
        geom_label(
          aes(
            label = scales::comma(x),
            hjust = ifelse(x > .5 * max_y, 1, 0),
            color = I(ifelse(x > .5 * max_y, "white", "black"))
          ),
          size = 8 / .pt,
          fill = NA, border.color = NA
        ) +
        facet_grid(~Group.2)
    })
    
    # For illustration purposes
    library(patchwork)
    
    plot_list |>
      wrap_plots() |>
      ggsave(
        filename = tempfile(fileext = ".png"),
        width = 16, scale = 2, units = "cm",
        height = 10
      ) |>
      fs::file_show()
    

    enter image description here