I have the following data table dt
and want to construct/create a density plot of nrOrders
for each unique delivYear
:
structure(list(delivYear = c("2018", "2018", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020",
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020",
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020",
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020",
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020",
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020",
"2020", "2020", "2020", "2020", "2020", "2020", "2020", "2020",
"2020", "2020", "2020", "2020"), acquiYear = c("2014", "2014",
"2014", "2014", "2014", "2014", "2014", "2014", "2014", "2014",
"2014", "2014", "2015", "2015", "2015", "2015", "2015", "2015",
"2015", "2015", "2015", "2015", "2015", "2015", "2016", "2016",
"2016", "2016", "2016", "2016", "2016", "2016", "2016", "2016",
"2016", "2016", "2017", "2017", "2017", "2017", "2017", "2017",
"2017", "2017", "2017", "2017", "2017", "2017", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018", "2016", "2016", "2016", "2016", "2016", "2016",
"2016", "2016", "2016", "2016", "2016", "2016", "2017", "2017",
"2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017",
"2017", "2017", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2019", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2016", "2016", "2016", "2016", "2016", "2016",
"2016", "2016", "2016", "2016", "2016", "2016", "2017", "2017",
"2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017",
"2017", "2017", "2018", "2018", "2018", "2018", "2018", "2018",
"2018", "2018", "2018", "2018", "2018", "2018", "2019", "2019",
"2019", "2019", "2019", "2019", "2019", "2019", "2019", "2019",
"2019", "2019", "2020", "2020", "2020", "2020", "2020", "2020",
"2020", "2020", "2020", "2020", "2020", "2020"), month = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L), .Label = c("Jan", "Feb", "Mar", "Apr", "May",
"Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), class = "factor"),
nrOrders = c(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 0, 2, 4, 5,
3, 7, 3, 5, 4, 3, 7, 8, 7, 2, 24, 16, 33, 9, 27, 16, 10,
27, 9, 31, 35, 11, 11, 25, 15, 18, 19, 19, 8, 27, 34, 43,
51, 0, 11, 2, 0, 0, 0, 0, 0, 4, 5, 1, 0, 8, 1, 18, 19, 10,
31, 7, 5, 19, 3, 18, 12, 2, 9, 24, 11, 12, 13, 10, 14, 17,
24, 20, 14, 13, 4, 0, 27, 6, 5, 13, 14, 13, 20, 17, 64, 3,
6, 4, 8, 1, 5, 3, 2, 2, 3, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0,
1, 0, 1, 0, 7, 1, 15, 8, 1, 16, 7, 3, 5, 14, 9, 5, 12, 16,
0, 13, 5, 0, 11, 7, 12, 12, 5, 35, 4, 6, 11, 11, 6, 19, 6,
22, 19, 52, 61, 44, 4, 6, 9, 1, 6, 2, 2, 1, 1, 0, 0, 0)), row.names = c(NA,
-168L), class = c("data.table", "data.frame"))
The question for the distribution/density plot is as follows: How is the number of orders nrOrders
for each delivery year delivYear
distributed over the month
?
I am not sure how to do that because there are no continuous variables.
How can I plot a distribution/density plot for this problem?
As your month variable is categorical maybe you would want to consider a stacked area chart with geom_area()
. Also if I understood correctly you should summarise the number of orders for each month first before proceeding with the plotting:
dt %>%
group_by(month,delivYear) %>%
summarise(sumOrders = sum(nrOrders)) %>%
ggplot() +
geom_area(aes(x = month,
y = sumOrders,
group = delivYear,
fill = delivYear,
alpha=0.7),
position = "identity") +
theme_classic()
Output: