Given a dataframe as follows:
df <- structure(list(date = structure(c(1L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 2L, 3L, 4L, 13L, 17L, 18L, 19L, 20L, 21L, 22L, 23L,
24L, 14L, 15L, 16L, 25L, 29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L,
26L, 27L, 28L), .Label = c("2010/1/31", "2010/10/31", "2010/11/30",
"2010/12/31", "2010/2/28", "2010/3/31", "2010/4/30", "2010/5/31",
"2010/6/30", "2010/7/31", "2010/8/31", "2010/9/30", "2011/1/31",
"2011/10/31", "2011/11/30", "2011/12/31", "2011/2/28", "2011/3/31",
"2011/4/30", "2011/5/31", "2011/6/30", "2011/7/31", "2011/8/31",
"2011/9/30", "2012/1/31", "2012/10/31", "2012/11/30", "2012/12/31",
"2012/2/29", "2012/3/31", "2012/4/30", "2012/5/31", "2012/6/30",
"2012/7/31", "2012/8/31", "2012/9/30"), class = "factor"), pct = c(14,
17.9, 17.9, 18.1, 18.2, 18.2, 18.2, 18.2, 18.3, 18.3, 18.4, 18.8,
19.9, 15.8, 16.34, 16.5, 16.6, 16.8, 16.8, 16.9, 17, 17, 17,
18.5, 13.1, 14.7, 14.8, 14.7, 14.5, 14.4, 14.2, 14.1, 14.1, 14.1,
14.2, 14.5), values = c(12718.1, 25052.3, 36374, 47884.4, 60339.5,
72669.4, 84922.2, 97492, 111028.5, 125313.3, 139224.2, 154553.7,
15249, 29018.1, 42921.8, 56570.8, 71267.6, 85832.7, 100240.7,
114945.7, 130810.8, 147357.2, 163486.1, 181225.8, 17222.1, 33668.6,
49318.8, 64921.9, 81636.7, 98221.6, 114536.5, 131195.4, 149422,
168355.8, 186832.5, 207166.7)), class = "data.frame", row.names = c(NA,
-36L))
I have plotted it with the following code:
df$date <- as.Date(df$date, format = "%Y/%m/%d")
df_m <- melt(df, id.vars='date')
df_m_x <- df_m %>%
filter(variable %in% c("values"))
df_m_ratio_x <- df_m %>%
filter(variable %in% c("pct")) %>%
mutate(value = value * 10000)
coeff = 1/10000
ggplot() +
geom_bar(data = df_m_x, aes(x = date, y = value, fill = variable, group = 1), alpha = 0.5, stat = 'identity') +
geom_point(data = df_m_ratio_x, aes(x = date, y = value, col = variable), size = 3) +
scale_y_continuous(name = "$", sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_x_date(limits = c(min(df$date), max(df$date)), breaks = date_breaks("6 months"), date_labels = "%Y-%m") +
geom_smooth(method="lm")
Out:
But as you may notice, the date in the x axis are misaligned by one month in the figure.
How could I solve this problem? Thanks.
The issue appears to be differences in how binning occurs between geom_bar
and geom_point
when you set the limits manually in scale_x_date
. Perhaps omitting that would be acceptable:
library(ggplot2)
library(scales)
coeff = 1/10000
ggplot(data = df, aes(x = as.Date(date, format = "%Y/%m/%d"))) +
geom_bar(aes(y = values), alpha = 0.5, stat = 'identity', fill = "#F8766D") +
geom_point(aes(y = pct * 1/coeff), size = 3, color = "#F8766D") +
scale_y_continuous(name = "$", sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_x_date(date_breaks= "6 months", date_labels = "%Y-%m", name = "date")
The reason that the bars appear to be "off" is because the bars are actually plotted slightly before the breaks. Here is a blown up version:
An alternative might be to use the yearmon
format from the zoo
package:
library(zoo)
coeff = 1/10000
ggplot(data = df, aes(x = as.yearmon(date, format = "%Y/%m/%d"))) +
geom_bar(aes(y = values), alpha = 0.5, stat = 'identity', fill = "#F8766D") +
geom_point(aes(y = pct * 1/coeff), size = 3, color = "#F8766D") +
scale_y_continuous(name = "$", sec.axis = sec_axis(~.*coeff, name = "%")) +
scale_x_yearmon(format = "%Y-%m", name = "date")