I want to apply reverse_geo function if the address is empty and latitude and longitude are not empty. The csv file:
latitude | longitude | address |
---|---|---|
38.89770 | -77.03655 | Pennsylvania Avenue, Washington, District of columbia, 20045, United States |
37.79520 | -122.40279 | |
41.87535 |
The code I run is:
library(tidygeocoder)
library(dplyr)
path <- "mypath"
data <- read.csv (paste (path, "sample2.csv", sep = ""))
data
data<-data %>%
mutate(address = case_when(address=="" & latitude!="" & longitude!="" ~ reverse_geo(lat = latitude, long = longitude, method = "osm")))
But I got
Error in `mutate()`:
! Problem while computing `address = case_when(...)`.
Caused by error:
! Can't use NA as column index with `[` at positions 1, 2, and 3.
Which I can't figure out why.
you could do that:
data$address <- ifelse((is.na(data$address) | data$address == '') & !(is.na(data$latitude) | is.na(data$longitude)),
reverse_geo(lat = data$latitude,
long = data$longitude, method = "osm")$address,
data$address)
or better that:
library(data.table)
setDT(data)
query_df <- data[(is.na(data$address) | data$address == '') & !(is.na(data$latitude) | is.na(data$longitude)),]
query_df$address <- reverse_geo(lat = query_df$latitude,
long = query_df$longitude, method = "osm")$address
data <- merge(data, query_df[!is.na(address),], by = c("latitude", "longitude"), all = TRUE)
data$address <- ifelse(is.na(data$address.y), data$address.x, data$address.y)
data$address.x <- NULL
data$address.y <- NULL
because first suggestion is going to load the server with useless queries.