rrworldmap

I'm trying to produce a world map in R Studio with country colour fill relative to a numeric variable


Problematic World Map

I am trying to create in R Studio a world map where each country which I have data for (not every country) is filled in with a colour on a gradual scale relative to that country's data - in this case the number of covid tests administered - or 'Total_tests'. The data I'm using is covid data from an example data set I was given. Trouble is the map comes out as above - with those diagonal lines all over the place.

The R code I have so far is;

install.packages("maps")
library(maps)

map_data_subset <- subset(covid, select = c("Country", "Continent","Total_tests", "Total_Deaths", "Total_Deaths_Per_Million"))
country_tests_deaths <- na.omit(map_data_subset)
world_map <- map_data("world")
merged_data <- merge(world_map, country_tests_deaths, by.x = "region", by.y = "Country", all.x = TRUE)


# Plotting map
ggplot(data = merged_data, aes(x = long, y = lat, group = group, fill = Total_tests)) +
  geom_polygon(color = "white") +
  scale_fill_gradient(low = "skyblue", high = "navyblue", name = "Your Legend") +
  theme_minimal()

My trouble is my world map comes out looking like the above - can anyone tell me why that is and how I can fix it? I had a hunch it might be to do with the world map data having circa 100,000 rows and my testing data only having ~100, but not sure if that's true nor how to fix it.

EDIT - below is the post 'na.omit' dataset I'd like to overlay onto the world map;

'''
> print(map_data_subset, n=104)
# A tibble: 104 × 2
    Country                          Total_tests
    <chr>                                  <dbl>
  1 Andorra                               215733
  2 Argentina                           24156096
  3 Armenia                              1770914
  4 Australia                           39943525
  5 Austria                             90377770
  6 Azerbaijan                           4952737
  7 Bahamas                               147633
  8 Bahrain                              6612314
  9 Bangladesh                           9907321
 10 Belgium                             20514883
 11 Belize                                278894
 12 Bhutan                               1160858
 13 Bolivia                              2456130
 14 Bosnia and Herzegovina               1253492
 15 Bulgaria                             4990220
 16 Canada                              44186056
 17 Chile                               22194480
 18 Colombia                            25954762
 19 Costa Rica                           1927328
 20 Cote d'Ivoire                        1029903
 21 Croatia                              2913176
 22 Cyprus                              13835984
 23 Denmark                             41501675
 24 Ecuador                              1761061
 25 Equatorial Guinea                     230491
 26 Estonia                              1997046
 27 Ethiopia                             3557710
 28 Finland                              7224917
 29 Gabon                                1188832
 30 Georgia                              8900000
 31 Germany                             75158696
 32 Greece                              21991126
 33 Guatemala                            2228737
 34 Hong Kong                           25906063
 35 Hungary                              6620866
 36 Iceland                               650933
 37 India                              583631490
 38 Iran                                33350660
 39 Iraq                                15338457
 40 Ireland                              7668724
 41 Israel                              28327710
 42 Italy                               95331171
 43 Jamaica                               617665
 44 Japan                               24344819
 45 Jordan                              10237614
 46 Kosovo                               1217376
 47 Kuwait                               4389988
 48 Laos                                  612410
 49 Liechtenstein                          69730
 50 Lithuania                            5381950
 51 Luxembourg                           3543784
 52 Maldives                             1543706
 53 Malta                                1306982
 54 Mexico                              10389202
 55 Moldova                              1822979
 56 Mongolia                             4074039
 57 Morocco                              8853903
 58 Mozambique                            901901
 59 Myanmar                              4376017
 60 Namibia                               713463
 61 Nepal                                4256803
 62 Netherlands                         13409992
 63 New Zealand                          3602589
 64 Nigeria                              3142971
 65 Norway                               7864561
 66 Pakistan                            19911021
 67 Panama                               3906709
 68 Paraguay                             1846953
 69 Philippines                         20507811
 70 Poland                              20707050
 71 Portugal                            19023656
 72 Qatar                                2723342
 73 Romania                             13294804
 74 Russia                             195638185
 75 Rwanda                               2938657
 76 Saint Kitts and Nevis                  46380
 77 Saint Vincent and the Grenadines       77176
 78 Saudi Arabia                        29430910
 79 Senegal                               828629
 80 Serbia                               5874429
 81 Singapore                           20436387
 82 Slovakia                            42271380
 83 Slovenia                             1626242
 84 South Africa                        17864698
 85 South Korea                         13721674
 86 Spain                               59314343
 87 Sri Lanka                            5355028
 88 Switzerland                         10760244
 89 Taiwan                               3619373
 90 Thailand                            14201188
 91 Timor                                 209126
 92 Togo                                  518580
 93 Trinidad and Tobago                   353373
 94 Tunisia                              2994047
 95 Turkey                              90162700
 96 Uganda                               1705808
 97 Ukraine                             13277259
 98 United Arab Emirates                87246490
 99 United Kingdom                     283376305
100 United States                      592381867
101 Uruguay                              3635691
102 Vietnam                             24871501
103 Zambia                               2509600
104 Zimbabwe                             1288436 
'''

Thanks in advance, Freddy


Solution

  • Updated per OP's comments

    Here is a solution that uses the sf package for the spatial data. The issue was caused by the world data you selected; map_data("world") returns a point data object that did not retain the join order for each point. I haven't looked into the default behaviour for how R joined up the points, but you could look into it if you're curious.

    With that in mind, using the sf package this approach converts your world_map object to a polygon sf object. You will still need to double-check that values in the join fields correspond to each other. That's because in some cases, a country may have more than one name e.g. alternative spellings. In this example, there are some non-matched records because of this (see code below). This is a common issue with joins that rely on text fields. You will need to manually edit these names to ensure all of your data are joined to world_map. The example map has not accounted for the non-matched countries:

    library(maps)
    library(ggplot2)
    library(sf)
    options(scipen = 999)
    
    world_map <- map("world", exact = FALSE, plot = FALSE, fill = TRUE)
    world_map <- world_map %>%
      st_as_sf() %>%
      rename(Country = "ID")
    
    # Your updated data, I created a .csv then imported it to R
    map_data_subset <- read.csv("C:/test/covid.csv", stringsAsFactors = FALSE)
    
    # Strip trailing white spaces from Country column
    map_data_subset$Country <- trimws(map_data_subset$Country)
    
    # Join data
    merged_data <- merge(world_map, 
                         map_data_subset,
                         by = "Country",
                         all.x = TRUE)
    
    # As per my comment above, some country names have different spellings so you 
    # will need to manually sort these out (on the joys of joins using text fields).
    # These are the non-matched countries:
    map_data_subset[!map_data_subset$Country %in% world_map$Country,]
    
    #                              Country Total_tests
    # 20                     Cote d'Ivoire     1029903
    # 34                         Hong Kong    25906063
    # 76             Saint Kitts and Nevis       46380
    # 77  Saint Vincent and the Grenadines       77176
    # 91                             Timor      209126
    # 93               Trinidad and Tobago      353373
    # 99                    United Kingdom   283376305
    # 100                    United States   592381867
    
    ggplot() +
      geom_sf(data = merged_data, # Note you can pass sf object directly to ggplot()
              aes(fill = Total_tests),
              colour = "white") +
      scale_fill_gradient(low = "skyblue",
                          high = "navyblue",
                          name = "Your Legend") +
      theme_minimal()
    

    result

    Update 2: Reducing whitespace around plot

    This is as close to a 'snug' fit I've managed to achieve for plotting an EPSG4237 world map, something about that 4237 crs and plotting at -90 produces a gap at the bottom. This issue persists even when creating a ratio variable with get_asp_ratio() from the tmaptools package. I end up just manually cropping the bottom gap.

    ggplot() +
      geom_sf(data = merged_data, # Note you can pass sf object directly to ggplot()
              aes(fill = Total_tests),
              colour = "white") +
      scale_fill_gradient(low = "skyblue",
                          high = "navyblue",
                          name = "Your Legend") +
      scale_x_continuous(expand = c(0, 0)) + # Set x buffer around plot to 0
      scale_y_continuous(expand = c(0, 0.5)) + # Set y buffer
      theme(panel.background = element_blank(),
            legend.position = c(0.1, 0.25), # Set legend position to inside plot
            axis.title = element_blank(),
            axis.text = element_blank(),
            axis.ticks = element_blank(),
            panel.grid = element_blank(),
            plot.margin = grid::unit(c(0,0,0,0), "mm")) 
    
    options(scipen = 999) # For non-scientific number notation in your legend
    
    ggsave("your save location/covid_map.jpg",
           # Roughly the correct ratio
           width = 11, 
           height = 5,
           dpi = 300)
    

    result

    This is the generalised answer that was posted originally:

    library(ggplot2)
    library(rnaturalearth) # For world map data
    library(sf) # For geospatial data
    library(dplyr)
    library(lubridate) # For date data
    
    # World map as an sf object
    countries <- ne_countries(returnclass = 'sf') %>% 
      rename(country = "name_long") # Rename to make join easier
    
    # Covid data
    covid <- read.csv("https://covid.ourworldindata.org/data/owid-covid-data.csv",
                           stringsAsFactors = FALSE)
    
    map_data_subset <- na.omit(covid)
    
    # Subset columns and get most recent data per country. Depending on your data, some of this
    # may not be relevant
    map_data_subset <- covid %>%
      select(location, continent, date, total_tests, total_deaths, total_deaths_per_million) %>%
      na.omit() %>%
      rename(country = "location") %>% # As before, rename to make join easier
      group_by(country) %>%
      mutate(date = ymd(date)) %>% # to enable sorting of date column is correct
      arrange(date) %>% # to ensure last value is most recent data
      slice(n()) %>% # Return only the most recent data per country
      ungroup()
    
    # Join data to world map
    merged_data <- countries %>%
      left_join(map_data_subset, by = "country")
    
    ggplot() +
      geom_sf(data = merged_data, # Note you can pass sf object directly to ggplot()
              aes(fill = total_tests),
              colour = "white") +
      scale_fill_gradient(low = "skyblue",
                          high = "navyblue",
                          name = "Your Legend") +
      theme_minimal()
    

    result