Issue
I have a data frame with 388 columns and two of these columns are labelled 'Longitude_E' and 'Latitude_N', containing GPS coordinates in degrees, minutes, and seconds (DMS).
I aim to create two empty columns, and label them as 'Decimal_Degrees_Longitude_E' and 'Degrees_Decimal_Latitude_N' because I want to insert GPS coordinates in decimal degrees (DD) into them, which have been converted from the DMS columns.
This data has been collected during fieldwork, and on some survey sheets, the GPS coordinates are missing, so some of the cells contain the factor 'Missed'
For Example:
[1] 33.53.270 33.85.093 33.52.7711 33.52.790 33.52.790 33.51.00 Missed Missed
[9] 33.85.093 Missed 33.50.141 33.85.0925 33.50.20 33.80.14991 33.50.471 Missed
[17] 33.49.49.89 33.57.9389 33.57.939 Missed Missed 33.49.755 33.48.452 33.85.093
[25] Missed 33.57.9389 33.48.7932 33.53.27 Missed 33.52.79 33.56.3312 33.55.2628
[33] 33.49.2552 33.49.2560 33.48.830 33.56.198 33.49.3460 Missed 33.5279 Missed
[41] 33.48.452 33.85.0925 33.51.199 33.51.199 33.52.479 Missed 33.49.2552 33.52.452
[49] Missed 33.50.106 33.50.106 Missed 33.85.0925 33.50.20? 33.50.6010 33.50.111
[57] 33.40.4042 Missed 33.52.479 33.51.103 33.52.284 38.52.144 33.50.549 33.48.8310
[65] 33.52,79 33.52.284 33.52.682 33.85.0925 33.51.493 33.51.007 33.47.078 33.85.0925
[73] Missed Missed 33.850.925 33.53.27 33.850.925 33.850.925 33
I Created two Empty Columns to Insert the DD Values into
library(dplyr, warn = FALSE)
New_Df<- myDf |>
mutate(
Decimal_Degrees_Longitude_E = NA,
Decimal_Degrees_Latitude_N = NA,
.after = 14
)
New_Df
I can't figure out how to modify the functions (below) so the factor 'Missed' contained within the cells of the columns doesn't interfere with the conversion of DMS to DD, and insert the DD values into the two empty-named columns.
A Section of my Data Frame:
New_Df<-structure(c("33°53'27.0", "33°85'09.3", "33°52'77.1", "33°52'79.0",
"33°52'79.0", "33°51'00.", "Missed", "Missed", "33°85'09.3",
"Missed", "33°50'14.1", "33°85'09.2", "33°50'20.", "33°80'14.9",
"33°50'47.1", "Missed", "33°49'0.0", "33°57'93.8", "33°57'93.9",
"Missed", "Missed", "33°49'75.5", "33°48'45.2", "33°85'09.3",
"Missed", "33°57'93.8", "33°48'79.3", "33°53'27.", "Missed",
"33°52'79.", "27°16'04.0", "27°40'93.2", "27°28'44.", "27°15'85.0",
"27°15'85.0", "27°21'40.", "Missed", "Missed", "27°40'93.2",
"Missed", "27°13'20.0", "27°40'93.1", "27°20'45.", "27°31'21.4",
"27°13'94.9", "Missed", "27°11'44.6", "27°10'85.2", "23°10'85.2",
"Missed", "Missed", "27°21'68.0", "27°18'19.0", "27°40'93.2",
"Missed", "27°10'85.2", "27°17'79.2", "27°16'04.", "Missed",
"27°15'85.", NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
), dim = c(30L, 4L), dimnames = list(NULL, c("Longitude_E_Corrected",
"Latitude_N_Corrected", "Decimal_Degrees_Longitude_E", "Decimal_Degrees_Latitude_N"
)))
First replacing "Missing" with NA
(using apply
since class(mat)
is [1] "matrix" "array"
), then giving the data to mutate
/ across
. Finally map_vec
the strsplit
values per variable to get the results.
library(dplyr)
as_tibble(apply(mat, 2, \(x) replace(x, x == "Missed", NA))) %>%
mutate(across(matches("^Latitude_N|^Longitude_E"), \(cols)
purrr::map_vec(strsplit(cols, "°|'"), ~
as.numeric(.x[1]) + as.numeric(.x[2]) / 60 + as.numeric(.x[3]) / 60 ^ 2),
.names="Decimal_Degrees_{col}"))
output
# A tibble: 30 × 4
Longitude_E_Corrected Latitude_N_Corrected Decimal_Degrees_Longitude_E_Corr…¹
<chr> <chr> <dbl>
1 33°53'27.0 27°16'04.0 33.9
2 33°85'09.3 27°40'93.2 34.4
3 33°52'77.1 27°28'44. 33.9
4 33°52'79.0 27°15'85.0 33.9
5 33°52'79.0 27°15'85.0 33.9
6 33°51'00. 27°21'40. 33.8
7 NA NA NA
8 NA NA NA
9 33°85'09.3 27°40'93.2 34.4
10 NA NA NA
# ℹ 20 more rows
# ℹ abbreviated name: ¹Decimal_Degrees_Longitude_E_Corrected
# ℹ 1 more variable: Decimal_Degrees_Latitude_N_Corrected <dbl>
# ℹ Use `print(n = ...)` to see more rows
mat <- structure(c("33°53'27.0", "33°85'09.3", "33°52'77.1", "33°52'79.0",
"33°52'79.0", "33°51'00.", "Missed", "Missed", "33°85'09.3",
"Missed", "33°50'14.1", "33°85'09.2", "33°50'20.", "33°80'14.9",
"33°50'47.1", "Missed", "33°49'0.0", "33°57'93.8", "33°57'93.9",
"Missed", "Missed", "33°49'75.5", "33°48'45.2", "33°85'09.3",
"Missed", "33°57'93.8", "33°48'79.3", "33°53'27.", "Missed",
"33°52'79.", "27°16'04.0", "27°40'93.2", "27°28'44.", "27°15'85.0",
"27°15'85.0", "27°21'40.", "Missed", "Missed", "27°40'93.2",
"Missed", "27°13'20.0", "27°40'93.1", "27°20'45.", "27°31'21.4",
"27°13'94.9", "Missed", "27°11'44.6", "27°10'85.2", "23°10'85.2",
"Missed", "Missed", "27°21'68.0", "27°18'19.0", "27°40'93.2",
"Missed", "27°10'85.2", "27°17'79.2", "27°16'04.", "Missed",
"27°15'85."), dim = c(30L, 2L), dimnames = list(NULL, c("Longitude_E_Corrected",
"Latitude_N_Corrected")))