I have age-standardized mortality rates with a 95%CI graph in a ggplot, is there any way I can add a number of deaths by year in this graph? I thought of showing this as a bar chart by adding a second axis to the graph. Any help would be much appreciated.
library(tidyverse)
dummmortalitydata %>%
pivot_longer(-(1:4)) %>%
separate(name, into = c('measure', 'year'), sep = "(?=\\d{4})") %>%
rename(ReferencePopulation = ReferecePopulation) %>%
pivot_wider(names_from = measure, values_from = value) %>%
mutate(NoOfDeaths = ifelse(NoOfDeaths > LocalPopulation,
LocalPopulation, NoOfDeaths)) %>%
rowwise() %>%
mutate(mortality = NoOfDeaths / LocalPopulation,
lower = prop.test(NoOfDeaths, LocalPopulation)$conf[1],
upper = prop.test(NoOfDeaths, LocalPopulation)$conf[2]) %>%
group_by(year) %>%
summarize(mortality = sum(mortality * ReferencePopulation/sum(ReferencePopulation)),
lower = sum(lower * ReferencePopulation/sum(ReferencePopulation)),
upper = sum(upper * ReferencePopulation/sum(ReferencePopulation))) %>%
ggplot(aes(year, mortality)) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2,
position = position_dodge(0.4)) +
geom_point(position = position_dodge(0.4)) +
scale_y_continuous(labels = scales::percent) +
theme_classic(base_size = 16)
here is my dummy data dput(dummmortalitydata)
structure(list(AgeGroup = c("0-4", "5-9", "10-14", "15-19", "20-24",
"25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-69",
"60-64", "65-69", "70-74", "75-79", "80-84", "85-89", "90+",
"0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39",
"40-44", "45-49", "50-54", "55-69", "60-64", "65-69", "70-74",
"75-79", "80-84", "85-89", "90+"), AgeGroup1 = c(1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2), Sex = c("M", "M", "M",
"M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M",
"M", "M", "M", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F"), ReferecePopulation = c(5000,
5500, 5500, 5500, 6000, 6000, 6500, 7000, 7000, 7000, 7000, 6500,
6000, 5500, 5000, 4000, 2500, 1500, 1000, 5000, 5500, 5500, 5500,
6000, 6000, 6500, 7000, 7000, 7000, 7000, 6500, 6000, 5500, 5000,
4000, 2500, 1500, 1000), NoOfDeaths2011 = c(52, 88, 45, 120,
22, 132, 143, 30, 36, 127, 127, 27, 135, 114, 78, 109, 96, 69,
125, 31, 137, 33, 69, 127, 55, 137, 77, 49, 136, 21, 40, 128,
99, 129, 146, 78, 36, 51), LocalPopulation2011 = c(384, 288,
783, 464, 798, 767, 119, 442, 383, 547, 854, 634, 172, 182, 372,
648, 947, 357, 605, 438, 759, 632, 715, 120, 811, 471, 780, 812,
349, 875, 873, 169, 232, 432, 577, 670, 579, 307), NoOfDeaths2012 = c(41,
74, 32, 23, 74, 97, 62, 53, 35, 64, 105, 104, 60, 64, 95, 35,
135, 88, 112, 47, 80, 117, 70, 145, 48, 139, 135, 101, 122, 73,
77, 38, 48, 60, 113, 85, 65, 57), LocalPopulation2012 = c(508,
502, 793, 586, 404, 187, 888, 301, 357, 711, 503, 693, 930, 359,
980, 281, 155, 650, 888, 126, 377, 286, 527, 241, 545, 473, 197,
596, 383, 625, 552, 409, 507, 700, 369, 414, 634, 148), NoOfDeaths2013 = c(82,
114, 44, 43, 106, 81, 84, 129, 40, 108, 68, 36, 124, 134, 79,
66, 96, 80, 135, 128, 60, 117, 53, 64, 88, 25, 120, 118, 24,
112, 69, 120, 96, 25, 121, 32, 119, 49), LocalPopulation2013 = c(773,
663, 461, 811, 882, 276, 284, 255, 145, 637, 311, 957, 947, 260,
275, 395, 716, 725, 794, 610, 751, 691, 595, 471, 760, 788, 660,
952, 792, 121, 420, 583, 477, 316, 566, 715, 557, 467), NoOfDeaths2014 = c(119,
38, 147, 106, 131, 131, 107, 41, 73, 112, 121, 74, 50, 20, 47,
141, 94, 118, 107, 134, 99, 79, 47, 70, 56, 137, 90, 63, 65,
121, 116, 83, 69, 119, 123, 95, 145, 142), LocalPopulation2014 = c(447,
248, 222, 940, 147, 935, 725, 413, 203, 602, 404, 706, 151, 869,
761, 889, 967, 453, 486, 815, 209, 967, 703, 199, 710, 731, 994,
370, 670, 299, 429, 275, 339, 290, 743, 625, 185, 261), NoOfDeaths2015 = c(91,
20, 35, 113, 106, 118, 29, 88, 83, 90, 119, 26, 120, 79, 76,
140, 71, 98, 23, 110, 93, 41, 20, 52, 118, 98, 117, 94, 36, 67,
23, 66, 83, 35, 64, 72, 98, 146), LocalPopulation2015 = c(603,
449, 808, 406, 719, 479, 229, 257, 298, 805, 366, 900, 322, 951,
552, 563, 170, 585, 611, 241, 427, 692, 685, 615, 851, 848, 871,
907, 845, 150, 712, 503, 279, 958, 995, 140, 398, 936), NoOfDeaths2016 = c(115,
120, 22, 27, 68, 149, 128, 115, 99, 86, 33, 75, 86, 130, 88,
105, 57, 76, 92, 56, 57, 88, 97, 91, 76, 71, 128, 109, 95, 73,
62, 125, 131, 101, 135, 131, 62, 45), LocalPopulation2016 = c(714,
846, 361, 556, 396, 118, 273, 365, 191, 489, 655, 971, 741, 431,
606, 249, 481, 617, 253, 451, 816, 676, 647, 417, 994, 597, 446,
581, 195, 160, 576, 234, 984, 298, 396, 967, 392, 961), NoOfDeaths2017 = c(25,
21, 77, 143, 38, 92, 75, 33, 49, 32, 127, 33, 95, 115, 102, 47,
79, 29, 51, 38, 114, 85, 51, 68, 133, 21, 29, 36, 30, 38, 104,
135, 50, 130, 117, 79, 25, 145), LocalPopulation2017 = c(815,
113, 380, 887, 323, 514, 477, 642, 532, 332, 508, 143, 723, 543,
939, 104, 860, 777, 391, 932, 118, 424, 288, 754, 296, 782, 742,
497, 343, 786, 524, 306, 794, 393, 686, 187, 558, 386)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -38L))
You would need to deliver the number of deaths per year to the ggplot
call with something like:
dum_out <- # all the data operations used previously
#data %>% group_by(year) %>%
# summarise( ....,
NumD =sum(mortality * ReferencePopulation)
) #as the fourth calculation in summarise
dum_out
# A tibble: 7 × 5
year mortality lower upper NumD
<chr> <dbl> <dbl> <dbl> <dbl>
1 2011 0.256 0.224 0.288 51212.
2 2012 0.210 0.175 0.249 41942.
3 2013 0.201 0.169 0.237 40183.
4 2014 0.238 0.202 0.279 47671.
5 2015 0.176 0.145 0.212 35288.
6 2016 0.246 0.209 0.284 49129.
7 2017 0.182 0.150 0.219 36483.
Now putting in a bar for NumD would not be adding useful information to that plot because the percent of total deaths that you have calculated with mortality = sum(mortality * ReferencePopulation/sum(ReferencePopulation))
is exactly proportional to the NumD value. You can see this if you run:
dum_out %>%
ggplot(aes(year, mortality)) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2,
position = position_dodge(0.4)) +
geom_point(position = position_dodge(0.4)) +
# adding bars for yearly deaths on same scale as the original plot
geom_col(data=dum_out, aes(y=NumD/sum(NumD)) ) +
scale_y_continuous(labels = scales::percent, limits=c(0, 0.3)) +
theme_classic(base_size = 16)
Instead you should consider using a text label
to put numeric values under each bar. (You should also include the zero value in the y-axis.)
dum_out %>%
ggplot( aes(year, mortality, label=paste("Deaths=\n", round(NumD)))) +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2,
position = position_dodge(0.4)) +
geom_point(position = position_dodge(0.4)) + geom_text(vjust=4 ) +
scale_y_continuous(labels = scales::percent, limits=c(0, 0.3)) +
theme_classic(base_size = 16)