Currently I have a dataframe of bear detections that I want to convert into a binary detection history (14 columns of day1, day2, day3, etc. where:
actual_date_out = the date the camera was deployed, binary detection history should start on this date
start_date = every date there's a detection of a bear, this is used to determine which of the 14 days should get a 1 for the detection history (e.g. actual_date_out = 12/1, start_date values for 12/3, 12/5, would yield 00101...)
What I'm trying to do is start a 14 day detection history (e.g. 001010001...) for each plot, starting with "actual_date_out" and ending 14 days after that. The problem is that sometimes the first detection (start_time) is e.g. 3 days after the deployment date and some dates don't have data, so I would have to fill in a 0 for some plots starting at the first deployment date (actual_date_out) and then for every preceding date that doesn't have a "start_time" column value (only dates that have start time row/presence have a 1). Some plots have 1 detection the whole time, others multiple for each day. The idea is each day just gets 0 or 1. Some plots may have multiple start_time values but each plot will only have one actual_date_out (each camera is only deployed once.)
Currently I have code that kind of works except it's putting a 1 for the first column and only 0s for all the preceding instead of searching for a start_time and assigning a 1. Only dates that also have a start_date get a value of 1.
library(tidyverse)
d <- read_csv(file.choose())
#Change actual_date_out to date format
d$actual_date_out <- as.Date(d$actual_date_out, format = "%m/%d/%Y")
d$retrieval_date <- as.Date(d$retrieval_date, format = "%m/%d/%Y")
d$start_time <- as.Date(d$start_time, format = "%m/%d/%Y")
#For each plot, find the earliest date in actual_date_out and then add the next 13 days. For each plot, each of the remaining 13 days should be added with a pres_abs = 0
d1 <- d |>
group_by(plot) |>
mutate(start_time = min(actual_date_out)) |>
complete(actual_date_out = seq.Date(first(start_time), first(start_time) + 20, by = "day")) |>
mutate(pres_abs = if_else(is.na(pres_abs), 0, pres_abs)) |>
ungroup()
# Removes duplicate entries by taking max of pres_abs
d2 <- d1 |>
group_by(plot, actual_date_out) |>
slice_max(pres_abs, with_ties = FALSE) |>
ungroup()
# Create news days variable as a sequence
d2 <- d2 |>
group_by(plot) |>
arrange(actual_date_out) |>
mutate(day = paste0("day_", row_number())) |>
ungroup()
# For each plot, take the actual_lat and actual_long from the first entry and propogate it to all the other entries for that plot
# For each plot, record the very first day in the 14 day period as actual_date_out
d2 <- d2 |>
group_by(plot) |>
mutate(actual_lat = first(actual_lat),
actual_long = first(actual_long),
first_day = first(actual_date_out)) |>
ungroup()
# Pivots to wider so each of the 14 days is a binarized variable
d3 <- d2 |>
select(-c(retrieval_date,start_time, actual_date_out)) |>
pivot_wider(id_cols = c(plot, actual_lat, actual_long, first_day),
names_from = day,
values_from = pres_abs)
> dput(d)
structure(list(plot = c("d22142", "d22142", "d22489", "d22489",
"d23081", "d23081", "d23081", "d23302", "d23544", "d23544", "d23544",
"d23544", "d23544", "d23544", "d23569", "d23569", "d23579", "d23647"
), actual_date_out = structure(c(17158, 17158, 17229, 17229,
17273, 17273, 17273, 17272, 17326, 17326, 17326, 17326, 17326,
17326, 17303, 17303, 17303, 17309), class = "Date"), retrieval_date = structure(c(17178,
17178, 17250, 17250, 17293, 17293, 17293, 17291, 17349, 17349,
17349, 17349, 17349, 17349, 17324, 17324, 17327, 17324), class = "Date"),
actual_lat = c(35.5767, 35.5767, 35.5901, 35.5901, 35.2851,
35.2851, 35.2851, 35.3086, 35.9439, 35.9439, 35.9439, 35.9439,
35.9439, 35.9439, 35.0581, 35.0581, 35.1264, 35.3453), actual_long = c(-82.4956,
-82.4956, -82.5901, -82.5901, -83.1089, -83.1089, -83.1089,
-82.5258, -82.6275, -82.6275, -82.6275, -82.6275, -82.6275,
-82.6275, -83.4274, -83.4274, -83.0983, -82.781), start_time = structure(c(17161,
17161, 17248, 17248, 17281, 17283, 17281, 17273, 17336, 17336,
17347, 17349, 17336, 17336, 17309, 17315, 17316, 17311), class = "Date"),
pres_abs = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1)), row.names = c(NA, -18L), spec = structure(list(
cols = list(plot = structure(list(), class = c("collector_character",
"collector")), actual_date_out = structure(list(), class = c("collector_character",
"collector")), retrieval_date = structure(list(), class = c("collector_character",
"collector")), actual_lat = structure(list(), class = c("collector_double",
"collector")), actual_long = structure(list(), class = c("collector_double",
"collector")), start_time = structure(list(), class = c("collector_character",
"collector")), pres_abs = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), delim = ","), class = "col_spec"), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"))
We can simply do a summarize()
and find the dates that have data in the 14-day window after actual_date_out
.
library(tidyverse)
d_binary <- d %>%
summarize(
actual_date_out = first(actual_date_out),
actual_lat = first(actual_lat),
actual_long = first(actual_long),
detection_history = paste0(as.numeric((actual_date_out + 0:13)
%in% start_time), collapse = ""),
.by = plot
)
#> # A tibble: 8 × 5
#> plot actual_date_out actual_lat actual_long detection_history
#> <chr> <date> <dbl> <dbl> <chr>
#> 1 d22142 2016-12-23 35.6 -82.5 00010000000000
#> 2 d22489 2017-03-04 35.6 -82.6 00000000000000
#> 3 d23081 2017-04-17 35.3 -83.1 00000000101000
#> 4 d23302 2017-04-16 35.3 -82.5 01000000000000
#> 5 d23544 2017-06-09 35.9 -82.6 00000000001000
#> 6 d23569 2017-05-17 35.1 -83.4 00000010000010
#> 7 d23579 2017-05-17 35.1 -83.1 00000000000001
#> 8 d23647 2017-05-23 35.3 -82.8 00100000000000
Here, I have done a simple visualization of the results;
d_binary %>%
rowwise() %>%
mutate(
days = list(1:14),
detections = list(as.numeric(strsplit(detection_history, "")[[1]])),
dates = list(actual_date_out + 0:13)
) %>%
unnest(c(days, detections, dates)) %>%
ungroup() %>%
mutate(
date_label = if_else(
detections == 1,
str_glue("{month(dates)}/{day(dates)}/{year(dates)}"), "")
) %>%
ggplot(., aes(x = days, y = reorder(plot, desc(plot)),
fill = factor(detections))) +
geom_tile(color = "white", linewidth = 0.8, width = 0.9, height = 0.8) +
geom_text(aes(label = date_label),
size = 2.5, color = "white", fontface = "bold") +
scale_fill_manual(
values = c("0" = "#d73027", "1" = "#1a9850"),
name = "Detection Status",
labels = c("0" = "Not Detected", "1" = "Detected")
) +
scale_x_continuous(
breaks = 1:14,
expand = c(0, 0)
) +
scale_y_discrete(expand = c(0, 0)) +
labs(
title = "Bear Detection History",
x = "Study Day",
y = "Plot ID"
) +
theme_minimal() +
theme(
axis.text.x = element_text(hjust = 1, size = 9),
axis.text.y = element_text(size = 9),
panel.grid = element_blank(),
panel.border = element_rect(color = "black", fill = NA, linewidth = 0.5),
plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
legend.position = "bottom",
legend.title = element_text(face = "bold")
)
Created on 2025-08-12 with reprex v2.1.1