I have this dataframe:
structure(list(Batch = c("0L", "0L", "0L", "0L", "0L", "0L",
"0L", "0L", "0L", "0L", "100L", "100L", "100L", "100L", "100L",
"100L", "100L", "100L", "100L", "100L", "100L", "100L", "25L",
"25L", "25L", "25L", "25L", "25L", "25L", "25L", "25L", "25L",
"25L", "25L", "50L", "50L", "50L", "50L", "50L", "50L", "50L",
"50L", "50L", "50L", "50L", "50L", "75L", "75L", "75L", "75L",
"75L", "75L", "75L", "75L", "75L", "75L", "75L", "75L"), Time = c("T0",
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0",
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0",
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0",
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0",
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0",
"T0", "T0"), Treatment = c("U", "U", "T", "T", "T", "T", "T",
"T", "T", "T", "U", "U", "T", "T", "T", "T", "T", "T", "T", "T",
"T", "T", "U", "U", "T", "T", "T", "T", "T", "T", "T", "T", "T",
"T", "U", "U", "T", "T", "T", "T", "T", "T", "T", "T", "T", "T",
"U", "U", "T", "T", "T", "T", "T", "T", "T", "T", "T", "T"),
Replicate = c(1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 3, 2, 3, 1,
2, 1, 2, 2, 3, 1, 3, 1, 2, 1, 3, 2, 3, 2, 3, 2, 3, 2, 3,
1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, 2, 3, 2, 3, 1, 3, 2,
3, 1, 2, 2, 3), Read_Count = c(1014, 3040, 29, 76, 8, 86,
20, 0, 1, 8, 5412, 6286, 8272, 12322, 751, 2667, 891, 2428,
2581, 4246, 575, 703, 516, 2469, 630, 945, 3135, 1888, 2086,
2289, 2802, 3736, 1120, 1756, 1001, 2087, 4858, 3248, 253,
715, 154, 775, 1333, 1242, 1211, 1579, 6168, 3952, 1916,
1636, 276, 337, 919, 1194, 109, 610, 605, 982), Organism = c("OrgA",
"OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA",
"OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA",
"OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA",
"OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA",
"OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA",
"OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA",
"OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA",
"OrgA"), Concentration = c(0, 0, 6.25, 6.25, 12.5, 12.5,
25, 49.75, 62.5, 62.5, 0, 0, 6.25, 6.25, 12.5, 12.5, 25,
25, 49.75, 49.75, 62.5, 62.5, 0, 0, 6.25, 6.25, 12.5, 12.5,
25, 25, 49.75, 49.75, 62.5, 62.5, 0, 0, 6.25, 6.25, 12.5,
12.5, 25, 25, 49.75, 49.75, 62.5, 62.5, 0, 0, 6.25, 6.25,
12.5, 12.5, 25, 25, 49.75, 49.75, 62.5, 62.5)), class = "data.frame", row.names = c(NA,
58L))
The first 20 rows look like this:
Batch Time Treatment Replicate Read_Count Organism Concentration
1 0L T0 U 1 1014 OrgA 0.00
2 0L T0 U 2 3040 OrgA 0.00
3 0L T0 T 1 29 OrgA 6.25
4 0L T0 T 2 76 OrgA 6.25
5 0L T0 T 1 8 OrgA 12.50
6 0L T0 T 2 86 OrgA 12.50
7 0L T0 T 2 20 OrgA 25.00
8 0L T0 T 1 0 OrgA 49.75
9 0L T0 T 1 1 OrgA 62.50
10 0L T0 T 2 8 OrgA 62.50
11 100L T0 U 2 5412 OrgA 0.00
12 100L T0 U 3 6286 OrgA 0.00
13 100L T0 T 2 8272 OrgA 6.25
14 100L T0 T 3 12322 OrgA 6.25
15 100L T0 T 1 751 OrgA 12.50
16 100L T0 T 2 2667 OrgA 12.50
17 100L T0 T 1 891 OrgA 25.00
18 100L T0 T 2 2428 OrgA 25.00
19 100L T0 T 2 2581 OrgA 49.75
20 100L T0 T 3 4246 OrgA 49.75
I want to perform a calculation on the data where I group by Batch, Time, Treatment, Organism, and Concentration. Subtracting the average of the T0 Read_Count where Treatment == U from the average of the T0 Read_Count where Treatment == T all divided by the average of the T0 Read_Count where Treatment == U all subtracted from 1.
For rows 11-14 in my example, I should have a final value of 1.760471876. Where the calculation is something like:
1-((average(c(5412, 6286))-average(c(8272, 12322))/average(c(5412, 6286)))
I tried this but it doesn't give me the correct final answer of 1.760471876
bisDF <- bis %>% group_by(Batch, Time, Treatment, Organism, Concentration) %>%
mutate(Via_Average = (1-(mean(Read_Count)-mean(Read_Count[Treatment == "T"])/mean(Read_Count))))
Note that my dataframe would be much longer with multiple Organisms
, not just the single organism I included here. Time
could have values in addition to T0
although this dataset only has T0
as the Time
. All TX values would be subtracted from the T0 value.
I believe you are trying to do something like this:
library(dplyr)
# Function to estimate the ratio of the mean in T to the U-mean
f <- \(u, rc) mean(rc, na.rm=T)/mean(u)
d |>
mutate(u_conc = mean(Read_Count[Treatment=="U"], na.rm=T), .by = c(Batch, Time, Organism)) |>
filter(Treatment == "T") |>
summarize(ratio_T_to_U = f(u_conc, Read_Count),.by = c(Batch, Time, Organism, Concentration))
Output:
Batch Time Organism Concentration ratio_T_to_U
1 0L T0 OrgA 6.25 0.025900345
2 0L T0 OrgA 12.50 0.023186976
3 0L T0 OrgA 25.00 0.009866798
4 0L T0 OrgA 49.75 0.000000000
5 0L T0 OrgA 62.50 0.002220030
6 100L T0 OrgA 6.25 1.760471876
7 100L T0 OrgA 12.50 0.292186699
8 100L T0 OrgA 25.00 0.283723713
9 100L T0 OrgA 49.75 0.583604035
10 100L T0 OrgA 62.50 0.109249444
11 25L T0 OrgA 6.25 0.527638191
12 25L T0 OrgA 12.50 1.682747069
13 25L T0 OrgA 25.00 1.465661642
14 25L T0 OrgA 49.75 2.190284757
15 25L T0 OrgA 62.50 0.963484087
16 50L T0 OrgA 6.25 2.625000000
17 50L T0 OrgA 12.50 0.313471503
18 50L T0 OrgA 25.00 0.300841969
19 50L T0 OrgA 49.75 0.833873057
20 50L T0 OrgA 62.50 0.903497409
21 75L T0 OrgA 6.25 0.350988142
22 75L T0 OrgA 12.50 0.060573123
23 75L T0 OrgA 25.00 0.208794466
24 75L T0 OrgA 49.75 0.071047431
25 75L T0 OrgA 62.50 0.156818182