I have this script to download observational data of a species from GBIF.org in R. Afterwards, the data is plotted on a map of Europe so make a map of the species distribution.
# PACKAGES
library(sf)
library(rgbif)
library(dismo)
library(tidyverse)
library(eurostat)
library(leaflet)
library(sf)
library(scales)
library(cowplot)
library(ggthemes)
library(ggplot2)
# GBIF EXAMPLE
# Get data from GBIF
data <- gbif("Psilocybe", "semilanceata", geo = TRUE)
# Filter out rows with missing coordinates
data_filtered <- data[complete.cases(data$lon, data$lat), ]
# Create a spatial dataframe
data_sf <- st_as_sf(data_filtered, coords = c("lon", "lat"), crs = 4326)
# MAP
# Get map data
get_eurostat_geospatial(resolution = 10,
nuts_level = 0,
year = 2016)
SHP_0 <- get_eurostat_geospatial(resolution = 10,
nuts_level = 0,
year = 2016)
# PLOT OBSERVATIONS + MAP
# Simple
species_plot <-ggplot() +
geom_sf() +
geom_sf(data = SHP_0, color = 'black', fill = 'grey90',linewidth = 0.20) +
scale_x_continuous(limits = c(-10, 30)) +
scale_y_continuous(limits = c(35, 70)) +
geom_sf(data = data_sf, color = "cornflowerblue", size = 1.2, alpha = 0.25) +
theme_void()
# Save the ggplot as a .jpg file with the species name
species_name <- "Psilocybe_semilanceata"
ggsave(paste0(species_name, ".jpg"), plot = species_plot, width = 10, height = 8, units = "in", dpi = 300)
I have a complete dataset with a few hundred species which I want to make a distribution map for. So, I loaded the dataset with the Latin species name:
> str(bird_names)
chr [1:280] "Prunella modularis" "Myiopsitta monachus" ...
> head(bird_names)
[1] "Prunella modularis" "Myiopsitta monachus" "Pyrrhura perlata"
[4] "Tyto alba" "Panurus biarmicus" "Merops apiaster"
Therefore I rewrote the first script with a for-loop to download all the species data from GBIF.org and plot it:
library(sf)
library(rgbif)
library(dismo)
library(tidyverse)
library(eurostat)
library(leaflet)
library(sf)
library(scales)
library(cowplot)
library(ggthemes)
library(ggplot2)
# Create a directory for storing bird maps
dir.create("BIRD_MAPS", showWarnings = FALSE)
# Your existing packages and GBIF example...
# Loop through each bird species
for (species in bird_names) {
# Split the species name into genus and species
species_parts <- strsplit(species, " ")[[1]]
genus <- species_parts[1]
species_name <- species_parts[2]
# Get data from GBIF for the current species
data <- gbif(genus, species_name, geo = TRUE)
# Filter out rows with missing coordinates
data_filtered <- data[complete.cases(data$lon, data$lat), ]
# Create a spatial dataframe
data_sf <- st_as_sf(data_filtered, coords = c("lon", "lat"), crs = 4326)
# Get map data
SHP_0 <- get_eurostat_geospatial(resolution = 10, nuts_level = 0, year = 2016)
# Plot observations + map
species_plot <- ggplot() +
geom_sf() +
geom_sf(data = SHP_0, color = 'black', fill = 'grey90', linewidth = 0.20) +
scale_x_continuous(limits = c(-10, 30)) +
scale_y_continuous(limits = c(35, 70)) +
geom_sf(data = data_sf, color = "cornflowerblue", size = 1.2, alpha = 0.25) +
theme_void()
# Save the ggplot as a .jpg file with the species name in BIRD_MAPS directory
ggsave(paste0("BIRD_MAPS/", gsub(" ", "_", species), ".jpg"), plot = species_plot, width = 10, height = 8, units = "in", dpi = 300)
}
However, I got this error:
2790578 records found
Error in gbif(genus, species_name, geo = TRUE) :
The number of records is larger than the maximum for download via this service (200,000)
Is it possible to overcome this? Do I get the error because the script wants to download all observations at once instead of all the observations of one species? I only want the observations of Europe so that could be a way to filter out some excess observations.
As L Tyrone
mentioned, you can use a rgbif
package. Below my standard workflow example to get the GBIF occurencies for species (including synonyms). Feel free to use it as guide:
occ_count
for them:spc <- "Cynodon dactylon (L.) Pers."
output_file <- "cynodon_dactylon.csv"
if(!file.exists(output_file)) {
a <- rgbif::name_backbone(name = spc) |>
subset(select = c("usageKey", "species", "scientificName", "status")) |>
dplyr::mutate(acceptedKey = usageKey)
b <- rgbif::name_usage(key = a$usageKey, data = "synonyms")$data |>
subset(select = c("key", "species", "scientificName", "taxonomicStatus", "acceptedKey")) |>
dplyr::rename(c(usageKey = key, status = taxonomicStatus))
a <- a |>
rbind(b)
for (i in 1:nrow(a)) {
key <- as.integer(a[i,"usageKey"])
occCount <- rgbif::occ_count(taxonKey = key)
a[a$usageKey == key, "occCount"] <- occCount
message(i, " ", a[i, "scientificName"], ", count = ", occCount)
}
readr::write_csv(a, file = output_file)
} else {
a <- readr::read_csv(file = output_file, show_col_types = FALSE)
}
rgbif
uses GBIF API, therefore you have to create an account on GBIF.if(file.exists(output_file)) {
a <- readr::read_csv(file = output_file, show_col_types = FALSE)
b <- a |>
subset(occCount >= 1L)
for (i in 1:nrow(b)) {
if(!("occDownloadKey" %in% names(b)) || is.na(b[i, "occDownloadKey"])) {
message(i, " ", b[i, "scientificName"])
calKey <- as.integer(b[i, "usageKey"])
res <- rgbif::occ_download(
rgbif::pred('taxonKey', calKey),
rgbif::pred('hasCoordinate', TRUE),
user = "your_user_name",
pwd = "your_password",
email = "your_email"
)
repeat {
ala <- rgbif::occ_download_meta(res)
message(paste(calKey, Sys.time(), ala$status, sep = " - "))
if (ala$status == "SUCCEEDED") {
Sys.sleep(3)
data <- rgbif::occ_download_get(key = ala$key, path = "data/gbif", overwrite = FALSE)
a[a$usageKey == calKey, "occDownloadKey"] <- ala$key
break
}
Sys.sleep(60)
}
} else {
message(i, " ", b[i, "scientificName"], " ", b[i, "occDownloadKey"])
}
}
readr::write_csv(a, file = output_file)
}
CoordinateCleaner
package to clean up the data, like:a <- readr::read_csv(file = output_file, show_col_types = FALSE)
b <- a |>
subset(!is.na(occDownloadKey))
d <- data.frame()
for (i in 1:nrow(b)) {
message(i, " of ", nrow(b), " - ", b[i, "scientificName"])
dd <- rgbif::occ_download_import(rgbif::as.download(paste0("data/gbif/",b[i,"occDownloadKey"],".zip"))) |>
subset(select = c(gbifID, species, decimalLongitude, decimalLatitude, countryCode, individualCount,
family, taxonRank, coordinateUncertaintyInMeters, year,
basisOfRecord, institutionCode, datasetName)) |>
subset(!is.na(decimalLongitude) & !is.na(decimalLatitude))
dd$gbifID <- as.character(dd$gbifID)
d <- dd |>
rbind(d)
}
d$countryCode <- countrycode::countrycode(d$countryCode, origin = 'iso2c', destination = 'iso3c')
d <- data.frame(d)
rf <- CoordinateCleaner::clean_coordinates(d,
lon = "decimalLongitude",
lat = "decimalLatitude",
countries = "countryCode",
species = "species",
tests = c("capitals", "centroids",
"equal", "gbif",
"institutions",
"outliers", "seas",
"zeros", "countries")
)
rf <- rf |>
sf::st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = "EPSG:4326") |>
subset(.summary == TRUE)