I have data of fish stomach contents (prey items).
In my original df, each fish (with a unique FID) had multiple rows(observations) - one row per unique prey taxon found. For example, if fish #10 had both daphnia and goby in its stomach, there were two rows for that same fish (one row with # of daphnia in that fish's stomach and one row for # of goby in that same stomach); if the fish only had daphnia in their stomach then they had one row; and so on.
I have converted my data from long to wide format to have one observation per row (one unique fish per row).
I am trying to calculate the proportion of empty stomachs by month (when totalnumPrey == 0).
Reproducible data (shortened; complete data has 488 observations):
structure(list(id = c("1001_28", "1001_29", "1001_30", "1001_31",
"1001_32", "1001_33", "1001_34", "1001_35", "1023_3", "614_1",
"614_3", "616_1", "616_3", "616_4", "616_5", "616_6", "824_23",
"824_24", "824_25", "824_26", "824_28", "824_29", "824_30", "824_31",
"824_32", "824_33", "824_35"), CRN = c(1001L, 1001L, 1001L, 1001L,
1001L, 1001L, 1001L, 1001L, 1023L, 614L, 614L, 616L, 616L, 616L,
616L, 616L, 824L, 824L, 824L, 824L, 824L, 824L, 824L, 824L, 824L,
824L, 824L), FID = c(28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
3L, 1L, 3L, 1L, 3L, 4L, 5L, 6L, 23L, 24L, 25L, 26L, 28L, 29L,
30L, 31L, 32L, 33L, 35L), ac = c(2L, 2L, 1L, 1L, 1L, 1L, 0L,
0L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L), mm = c(200L, 159L, 117L, 120L, 108L, 103L, 92L,
97L, 104L, 301L, 163L, 85L, 271L, 290L, 330L, 294L, 270L, 260L,
266L, 197L, 195L, 185L, 160L, 157L, 178L, 166L, 149L), gr = c(95,
44, 15.1, 16.1, 11, 10, 6.9, 7.9, 10.9, 418, 62, 6.8, 311, 453,
593, 395, 283, 275, 261, 96, 90, 90, 56, 50, 57, 62, 45.5), catch = c(2L,
2L, 4L, 4L, 4L, 4L, 2L, 2L, 1L, 3L, 3L, 1L, 5L, 5L, 5L, 5L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 14L), Daphnia = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Byths = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
18L, 79L, 71L, 8L, 73L, 0L, 38L, 39L), Chiro.Pupae = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 255L, 7L, 0L, 576L, 590L, 536L, 576L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Empty = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Chiro.Larvae = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 5L, 38L, 0L, 9L, 0L, 0L, 0L), Amphipod = c(0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 6L, 0L, 0L, 0L, 4L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Isopod = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 5L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), Chironomidae = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L), Hemimysis = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Copepoda = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), Sphaeriidae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), Chiro.Adult = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 74L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), Trichopteran = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L), UID.Fish = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), Chydoridae = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
200L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L), Cyclopoid = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), Fish.Eggs = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L), EggMass = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), Dreissena = c(0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L
), Goby = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Eurycercidae = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Hirudinea = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), totalnumPrey = c(0, 0, 0,
0, 1, 0, 0, 0, 200, 262, 81, 0, 576, 595, 536, 582, 0, 0, 0,
19, 84, 110, 9, 82, 0, 38, 40), MONTH = c(11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L), DAY = c(4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 6L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L), empty = c("Empty",
"Empty", "Empty", "Empty", "Not_empty", "Empty", "Empty", "Empty",
"Not_empty", "Not_empty", "Not_empty", "Empty", "Not_empty",
"Not_empty", "Not_empty", "Not_empty", "Empty", "Empty", "Empty",
"Not_empty", "Not_empty", "Not_empty", "Not_empty", "Not_empty",
"Empty", "Not_empty", "Not_empty")), row.names = c(NA, -27L), class = c("data.table",
"data.frame"))
I haven't been able to figure out a way to calculate proportion using counts instead of actual values (since I need to count the 0 values by group and not use the actual 0 value to calculate the proportion).
I have tried the following:
example %>%
group_by(empty, MONTH) %>%
summarise(totalnumPrey = n()) %>%
mutate(prop = n / sum(n))
This gives the following error:
Error in `mutate()`:
! Problem while computing `prop = n/sum(n)`.
ℹ The error occurred in group 1: empty = "Empty".
Caused by error in `sum()`:
! invalid 'type' (closure) of argument
I also tried this:
transform(example,
perc = ave(totalnumPrey,
empty,
FUN = prop.table))
but this doesn't give me what I need...
Also this:
example %>%
group_by(MONTH) %>%
summarise(n = n()) %>%
mutate(freq = n / sum(n))
which gives me proportion by month, not what I need (i.e. for June it's doing 127/362 = 0.35)...
I have tried many other ways from examples I found in other SO posts but still can't get what I need.
Is there a way I can calculate the proportion of empty vs non-empty stomachs by month?
I also need to do this for each prey type/taxon. For example, proportion of individual fish that contain "Isopod" and so on for each unique taxon in my data. Presence/absence type of proportions. I mainly want to do this by month first, but will eventually use other groupings.
When I had the data in long format, I was able to calculate proportion of each prey item within one fish stomach by using:
transform(a,
perc = ave(number,
id,
FUN = prop.table))
data not included here.. but 'number' here being the total count of each unique prey taxa/group per stomach/fish & 'id' unique identifier I created to distinguish between different fish (since there were multiple rows for same fish).
I am happy to clarify anything that is not clear or add additional data if needed. I have searched online and in SO for a few days but still can't figure this out. Thank you in advance.
I think this is what you need.
What we need to do is to count the number of times the column empty
is equal to "Empty"
per each group - so we can do this using sum(empty=="Empty")
and then divide by the number of rows in that group n()
.
library(dplyr)
dat %>%
group_by(MONTH) %>%
summarise(
prop_empty = sum(empty=="Empty")/n(),
prop_not_empy = sum(empty != "Empty")/n()
)
# A tibble: 3 × 3
MONTH prop_empty prop_not_empy
<int> <dbl> <dbl>
1 6 0.143 0.857
2 8 0.364 0.636
3 11 0.778 0.222