rmapsoverlayggmap

Overlay map with legend using qmplot


I have the following code that is creating a correctly colored map overlay. The only problem is I can't get it to include a correct legend. What I want to do is add 2 legends, one for each scale/candidate - so there would be one legend for the blues that show the possible intervals, and one legend for the reds that show all possible intervals. The intervals are defined by the breaks where each integer is the bottom of an interval and the top of the next (intervals: 40-50, 50-60, 60-70, 70-80, 80-90). I have code that seems to get close but it cuts off the top interval of the reds and it seems to be inverting the colors.

Here is code that is producing the correct map coloring without a legend:

library(httr)
library(XML)
library(ggplot2)
library(maps)
library(RColorBrewer)
library(maptools)
library(ggmap)

# Map data for Alabama counties (polygons)
ala <- map("county", regions = "alabama", plot = FALSE, fill = TRUE)
sapply(ala, head)

IDs <- sub("^alabama,", "", ala$names)
head(IDs)

ala_sp <- map2SpatialPolygons(ala, IDs, CRS("+proj=longlat"))
names(ala_sp)
slotNames(ala_sp)
bbox(ala_sp)

plot(ala_sp, axes = TRUE)


# ggmap version of overlay
ala_data <- fortify(ala_sp)
dim(ala_data)
ala_data[1:60, ]
subset(ala_data, id == "st clair") # need to match saint clair
ala_data$id <- gsub("st clair", "saint clair", ala_data$id)
subset(ala_data, id == "saint clair") # check matches
head(ala_data)

# Define the breaks
breaks <- c(40, 50, 60, 70, 80, 90)

# Define the colors
colors <- brewer.pal(10, "RdBu")
jones_colors <- colors[6:10]
moore_colors <- colors[5:1]

# Assign colors
jones_cuts <- cut(etable$JonesPct, breaks)
moore_cuts <- cut(etable$MoorePct, breaks)
jones <- jones_colors[jones_cuts]
moore <- moore_colors[moore_cuts]
map_colors <- ifelse(is.na(etable$JonesPct), moore, jones)

# Create matching data and merge
vote_data <- cbind(etable, map_colors)
vote_data$County <- tolower(vote_data$County)
vote_data$County <- gsub("st. clair", "saint clair", vote_data$County)
vote_data$County <- gsub("dekalb", "de kalb", vote_data$County)
vote_data <- vote_data[order(vote_data$County), ]
head(vote_data)

ala_color_data <- merge(ala_data, vote_data[c("County", "map_colors")], 
                        by.x = "id", by.y = "County", all.x = TRUE)
ala_color_data <- ala_color_data[order(ala_color_data$id), ]

#### Colored map without legend, correct colors ####
qmplot(long, lat, data = ala_data, force = TRUE, legend = "bottomright", alpha = I(0)) +
  geom_polygon(aes(x = long, y = lat, group = group, fill = map_colors),
               data = ala_color_data,
               colour = "white", alpha = 0.4, size = 0.3
  ) +
  scale_fill_identity() +
  labs(title = "Alabama Election Results 2017 - U.S. Senate",
       subtitle = "\n Moore vs. Jones") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, color = "gray40")) 

this uses the following data:

vote_data <- data.frame(
  County = c(
    "autauga", "baldwin", "barbour", "bibb", "blount", "bullock",
    "butler", "calhoun", "chambers", "cherokee", "chilton", "choctaw",
    "clarke", "clay", "cleburne", "coffee", "colbert", "conecuh",
    "coosa", "covington", "crenshaw", "cullman", "dale", "dallas",
    "de kalb", "elmore", "escambia", "etowah", "fayette", "franklin",
    "geneva", "greene", "hale", "henry", "houston", "jackson", "jefferson",
    "lamar", "lauderdale", "lawrence", "lee", "limestone", "lowndes",
    "macon", "madison", "marengo", "marion", "marshall", "mobile",
    "monroe", "montgomery", "morgan", "perry", "pickens", "pike",
    "randolph", "russell", "saint clair", "shelby", "sumter", "talladega",
    "tallapoosa", "tuscaloosa", "walker", "washington", "wilcox",
    "winston"
  ),
  Jones = c(
    5606, 22131, 3680, 1567, 2405, 2712, 2914, 11705, 4247, 1525, 2298, 2273,
    4346, 985, 594, 3715, 6865, 2259, 1414, 2102, 1320, 4156, 3842, 10492, 3559,
    7711, 3640, 10518, 1142, 1770, 1289, 3340, 3894, 1896, 9107, 3328, 149522,
    779, 9908, 3028, 19810, 9606, 3779, 5780, 65664, 4495, 1311, 5134, 62253,
    3244, 48186, 10901, 3138, 3057, 3989, 1692, 6761, 6203, 27251, 3527, 9967,
    4590, 30858, 4317, 1799, 3344, 911
  ),
  Moore = c(
    8752, 38445, 2699, 3599, 11621, 656, 2756, 14567, 3308, 3996, 7555, 1949,
    3984, 2586, 2465, 8052, 7762, 1815, 1867, 6825, 2347, 16602, 6988, 3485,
    10097, 14411, 4985, 15693, 3491, 4214, 5431, 462, 1691, 3014, 14796, 7316,
    66309, 2847, 12775, 5314, 14017, 14298, 988, 758, 46313, 2804, 5268, 13828,
    46725, 3276, 17705, 19187, 821, 2961, 4154, 3229, 3622, 15876, 36424, 814,
    9698, 7171, 22064, 11909, 3320, 999, 4680
  ),
  WriteIns = c(
    253, 1699, 41, 66, 180, 7, 41, 399, 75, 110, 132, 17, 43, 44, 30, 202, 171,
    18, 30, 88, 56, 324, 136, 60, 234, 338, 87, 617, 50, 48, 93, 9, 32, 38, 285,
    154, 3710, 29, 382, 61, 672, 515, 13, 20, 3446, 62, 68, 449, 1539, 40, 743,
    668, 11, 46, 97, 23, 55, 459, 1718, 18, 223, 148, 1007, 259, 48, 16, 67
  ),
  Rpt = rep("100%", 67L),
  Total = c(
    14611, 62275, 6420, 5232, 14206, 3375, 5711, 26671, 7630, 5631, 9985, 4239,
    8373, 3615, 3089, 11969, 14798, 4092, 3311, 9015, 3723, 21082, 10966, 14037,
    13890, 22460, 8712, 26828, 4683, 6032, 6813, 3811, 5617, 4948, 24188, 10798,
    219541, 3655, 23065, 8403, 34499, 24419, 4780, 6558, 115423, 7361, 6647,
    19411, 110517, 6560, 66634, 30756, 3970, 6064, 8240, 4944, 10438, 22538,
    65393, 4359, 19888, 11909, 53929, 16485, 5167, 4359, 5658
  ),
  JonesPct = c(
    NA, NA, 57.3208722741433, NA, NA, 80.3555555555556, 51.0243389949221, NA,
    55.6618610747051, NA, NA, 53.6211370606275, 51.9049325211991, NA, NA, NA, NA,
    55.2052785923754, NA, NA, NA, NA, NA, 74.7453159507017, NA, NA, NA, NA, NA,
    NA, NA, 87.6410390973498, 69.3252625956917, NA, NA, NA, 68.1066406730406, NA,
    NA, NA, 57.4219542595438, NA, 79.0585774058577, 88.1366270204331,
    56.8898746350381, 61.0650726803423, NA, NA, 56.3288905779201, NA,
    72.3144340726956, NA, 79.0428211586902, 50.4122691292876, NA, NA,
    64.7729450086223, NA, NA, 80.9130534526268, 50.1156476267096, NA,
    57.2196777244154, NA, NA, 76.7148428538656, NA
  ),
  MoorePct = c(
    59.9000752857436, 61.7342432757929, NA, 68.7882262996942, 81.8034633253555,
    NA, NA, 54.6173746766151, NA, 70.964304741609, 75.6634952428643, NA, NA,
    71.5352697095436, 79.799287795403, 67.2737906257833, 52.45303419381, NA,
    56.3877982482634, 75.7071547420965, 63.0405586892291, 78.7496442462764,
    63.7242385555353, NA, 72.6925845932325, 64.1629563668744, 57.2199265381084,
    58.4948561204711, 74.5462310484732, 69.8607427055703, 79.7152502568619, NA,
    NA, 60.9135004042037, 61.170828510005, 67.7532876458604, NA, 77.8932968536252,
    55.3869499241275, 63.2393192907295, NA, 58.5527662885458, NA, NA, NA, NA,
    79.2537987061832, 71.237957858946, NA, 49.9390243902439, NA, 62.384575367408,
    NA, NA, 50.4126213592233, 65.3114886731392, NA, 70.4410329221759,
    55.7001513923509, NA, NA, 60.2149634730036, NA, 72.2414316044889,
    64.2539191019934, NA, 82.7147401908802
  ),
  map_colors = c(
    "#F4A582", "#D6604D", "#92C5DE", "#D6604D", "#67001F", "#053061",
    "#92C5DE", "#F4A582", "#92C5DE", "#B2182B", "#B2182B", "#92C5DE",
    "#92C5DE", "#B2182B", "#B2182B", "#D6604D", "#F4A582", "#92C5DE",
    "#F4A582", "#B2182B", "#D6604D", "#B2182B", "#D6604D", "#2166AC",
    "#B2182B", "#D6604D", "#F4A582", "#F4A582", "#B2182B", "#D6604D",
    "#B2182B", "#053061", "#4393C3", "#D6604D", "#D6604D", "#D6604D",
    "#4393C3", "#B2182B", "#F4A582", "#D6604D", "#92C5DE", "#F4A582",
    "#2166AC", "#053061", "#92C5DE", "#4393C3", "#B2182B", "#B2182B",
    "#92C5DE", "#FDDBC7", "#2166AC", "#D6604D", "#2166AC", "#92C5DE",
    "#F4A582", "#D6604D", "#4393C3", "#B2182B", "#F4A582", "#053061",
    "#92C5DE", "#D6604D", "#92C5DE", "#B2182B", "#D6604D", "#2166AC",
    "#67001F"
  )
)

and

etable <- data.frame(
  County = c(
    "Jefferson", "Madison", "Mobile", "Montgomery", "Shelby", "Baldwin",
    "Tuscaloosa", "Lee", "Morgan", "Etowah", "Calhoun", "Limestone",
    "Houston", "Lauderdale", "St. Clair", "Elmore", "Cullman", "Talladega",
    "Marshall", "Walker", "Colbert", "Autauga", "Blount", "Dallas",
    "DeKalb", "Coffee", "Tallapoosa", "Dale", "Jackson", "Russell",
    "Chilton", "Covington", "Escambia", "Lawrence", "Clarke", "Pike",
    "Chambers", "Marengo", "Geneva", "Marion", "Monroe", "Macon",
    "Barbour", "Pickens", "Franklin", "Butler", "Winston", "Cherokee",
    "Hale", "Bibb", "Washington", "Henry", "Randolph", "Lowndes",
    "Fayette", "Sumter", "Wilcox", "Choctaw", "Conecuh", "Perry",
    "Greene", "Crenshaw", "Lamar", "Clay", "Bullock", "Coosa", "Cleburne"
  ),
  Jones = c(
    149522, 65664, 62253, 48186, 27251, 22131, 30858, 19810, 10901, 10518, 11705,
    9606, 9107, 9908, 6203, 7711, 4156, 9967, 5134, 4317, 6865, 5606, 2405, 10492,
    3559, 3715, 4590, 3842, 3328, 6761, 2298, 2102, 3640, 3028, 4346, 3989, 4247,
    4495, 1289, 1311, 3244, 5780, 3680, 3057, 1770, 2914, 911, 1525, 3894, 1567,
    1799, 1896, 1692, 3779, 1142, 3527, 3344, 2273, 2259, 3138, 3340, 1320, 779,
    985, 2712, 1414, 594
  ),
  Moore = c(
    66309, 46313, 46725, 17705, 36424, 38445, 22064, 14017, 19187, 15693, 14567,
    14298, 14796, 12775, 15876, 14411, 16602, 9698, 13828, 11909, 7762, 8752,
    11621, 3485, 10097, 8052, 7171, 6988, 7316, 3622, 7555, 6825, 4985, 5314,
    3984, 4154, 3308, 2804, 5431, 5268, 3276, 758, 2699, 2961, 4214, 2756, 4680,
    3996, 1691, 3599, 3320, 3014, 3229, 988, 3491, 814, 999, 1949, 1815, 821, 462,
    2347, 2847, 2586, 656, 1867, 2465
  ),
  WriteIns = c(
    3710, 3446, 1539, 743, 1718, 1699, 1007, 672, 668, 617, 399, 515, 285, 382,
    459, 338, 324, 223, 449, 259, 171, 253, 180, 60, 234, 202, 148, 136, 154, 55,
    132, 88, 87, 61, 43, 97, 75, 62, 93, 68, 40, 20, 41, 46, 48, 41, 67, 110, 32,
    66, 48, 38, 23, 13, 50, 18, 16, 17, 18, 11, 9, 56, 29, 44, 7, 30, 30
  ),
  Rpt = rep("100%", 67L),
  Total = c(
    219541, 115423, 110517, 66634, 65393, 62275, 53929, 34499, 30756, 26828,
    26671, 24419, 24188, 23065, 22538, 22460, 21082, 19888, 19411, 16485, 14798,
    14611, 14206, 14037, 13890, 11969, 11909, 10966, 10798, 10438, 9985, 9015,
    8712, 8403, 8373, 8240, 7630, 7361, 6813, 6647, 6560, 6558, 6420, 6064, 6032,
    5711, 5658, 5631, 5617, 5232, 5167, 4948, 4944, 4780, 4683, 4359, 4359, 4239,
    4092, 3970, 3811, 3723, 3655, 3615, 3375, 3311, 3089
  ),
  JonesPct = c(
    68.1066406730406, 56.8898746350381, 56.3288905779201, 72.3144340726956, NA,
    NA, 57.2196777244154, 57.4219542595438, NA, NA, NA, NA, NA, NA, NA, NA, NA,
    50.1156476267096, NA, NA, NA, NA, NA, 74.7453159507017, NA, NA, NA, NA, NA,
    64.7729450086223, NA, NA, NA, NA, 51.9049325211991, NA, 55.6618610747051,
    61.0650726803423, NA, NA, NA, 88.1366270204331, 57.3208722741433,
    50.4122691292876, NA, 51.0243389949221, NA, NA, 69.3252625956917, NA, NA, NA,
    NA, 79.0585774058577, NA, 80.9130534526268, 76.7148428538656,
    53.6211370606275, 55.2052785923754, 79.0428211586902, 87.6410390973498, NA,
    NA, NA, 80.3555555555556, NA, NA
  ),
  MoorePct = c(
    NA, NA, NA, NA, 55.7001513923509, 61.7342432757929, NA, NA, 62.384575367408,
    58.4948561204711, 54.6173746766151, 58.5527662885458, 61.170828510005,
    55.3869499241275, 70.4410329221759, 64.1629563668744, 78.7496442462764, NA,
    71.237957858946, 72.2414316044889, 52.45303419381, 59.9000752857436,
    81.8034633253555, NA, 72.6925845932325, 67.2737906257833, 60.2149634730036,
    63.7242385555353, 67.7532876458604, NA, 75.6634952428643, 75.7071547420965,
    57.2199265381084, 63.2393192907295, NA, 50.4126213592233, NA, NA,
    79.7152502568619, 79.2537987061832, 49.9390243902439, NA, NA, NA,
    69.8607427055703, NA, 82.7147401908802, 70.964304741609, NA, 68.7882262996942,
    64.2539191019934, 60.9135004042037, 65.3114886731392, NA, 74.5462310484732,
    NA, NA, NA, NA, NA, NA, 63.0405586892291, 77.8932968536252, 71.5352697095436,
    NA, 56.3877982482634, 79.799287795403
  )
)

it outputs this which has all the right coloring: enter image description here

I have tried adding legends this way:

#### colored map with legend, colors are off ####

# # Create a new data frame for the legend
intervals <- c("dem 40-50", "dem 50-60", "dem 60-70", "dem 70-80", "dem 80-90",
               "rep 40-50", "rep 50-60", "rep 60-70", "rep 70-80", "rep 80-90")

# Combine the colors for both candidates
colors <- c(jones_colors, moore_colors)

# Colored map with legend
qmplot(long, lat, data = ala_data, force = TRUE, alpha = I(0)) +
  geom_polygon(aes(x = long, y = lat, group = group, fill = map_colors),
               data = ala_color_data,
               colour = "white", alpha = 0.4, size = 0.3
  ) +
  scale_fill_manual(values = colors,
                    labels = c(intervals),
                    name = "Percentages") +
  labs(title = "Alabama Election Results 2017 - U.S. Senate",
       subtitle = "\n Moore vs. Jones") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5, color = "gray40"))

but it inverts colors and leaves the final red interval off completely.


Solution

  • I think the easiest way to get the desired output is using directly the classified values (obtained using cut) to set the fill color as factor and then manually changing the colors with scale_fill_manual. This is the usual approach to set aesthetics in ggplot2 and makes it easier to see the unique values observed in your data.

    Set map_colors using the cuts output.

    jones_cuts <- paste0("dem_", jones_cuts)
    moore_cuts <- paste0("rep_", moore_cuts)
    map_colors <- ifelse(is.na(etable$JonesPct), moore_cuts, jones_cuts)
    

    Then take a look at the unique values of map_colors and order them in the desired order. Remove one color in the jones_colors vector, since there is no value from dem 40-50.

    # See unique values
    unique(map_colors)
    # Order them in desired order
    intervals <- c("dem 50-60", "dem 60-70", "dem 70-80", "dem 80-90",
                   "rep 40-50", "rep 50-60","rep 60-70", "rep 70-80", "rep 80-90")
    
    # Combine the colors for both candidates
    colors <- c(jones_colors[c(2:5)], moore_colors)
    

    Make the map. Indicate the levels order for map_colors as factor.

    #Colored map with legend
    qmplot(long, lat, data = ala_data, force = TRUE, alpha = I(0)) +
      geom_polygon(aes(x = long, 
                       y = lat, group = group, 
                       fill = factor(map_colors, 
                                     levels = c("dem_(50,60]", "dem_(60,70]","dem_(70,80]",
                                                "dem_(80,90]","rep_(40,50]", "rep_(50,60]", 
                                                "rep_(60,70]","rep_(70,80]", "rep_(80,90]"))),
                   data = ala_color_data,
                   colour = "white", 
                   alpha = 0.4, 
                   size = 0.3
      ) +
      scale_fill_manual(values = colors,
                        labels = intervals,
                        name = "Percentages") +
      labs(title = "Alabama Election Results 2017 - U.S. Senate",
           subtitle = "\n Moore vs. Jones") +
      theme(plot.title = element_text(hjust = 0.5),
            plot.subtitle = element_text(hjust = 0.5, color = "gray40"))
    

    map votes