rgroup-by

How to calculate difference with multiple group_by and mean to time 0 value


I have this dataframe:

    structure(list(Batch = c("0L", "0L", "0L", "0L", "0L", "0L", 
"0L", "0L", "0L", "0L", "100L", "100L", "100L", "100L", "100L", 
"100L", "100L", "100L", "100L", "100L", "100L", "100L", "25L", 
"25L", "25L", "25L", "25L", "25L", "25L", "25L", "25L", "25L", 
"25L", "25L", "50L", "50L", "50L", "50L", "50L", "50L", "50L", 
"50L", "50L", "50L", "50L", "50L", "75L", "75L", "75L", "75L", 
"75L", "75L", "75L", "75L", "75L", "75L", "75L", "75L"), Time = c("T0", 
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", 
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", 
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", 
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", 
"T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", "T0", 
"T0", "T0"), Treatment = c("U", "U", "T", "T", "T", "T", "T", 
"T", "T", "T", "U", "U", "T", "T", "T", "T", "T", "T", "T", "T", 
"T", "T", "U", "U", "T", "T", "T", "T", "T", "T", "T", "T", "T", 
"T", "U", "U", "T", "T", "T", "T", "T", "T", "T", "T", "T", "T", 
"U", "U", "T", "T", "T", "T", "T", "T", "T", "T", "T", "T"), 
    Replicate = c(1, 2, 1, 2, 1, 2, 2, 1, 1, 2, 2, 3, 2, 3, 1, 
    2, 1, 2, 2, 3, 1, 3, 1, 2, 1, 3, 2, 3, 2, 3, 2, 3, 2, 3, 
    1, 2, 2, 3, 1, 2, 1, 2, 2, 3, 2, 3, 2, 3, 2, 3, 1, 3, 2, 
    3, 1, 2, 2, 3), Read_Count = c(1014, 3040, 29, 76, 8, 86, 
    20, 0, 1, 8, 5412, 6286, 8272, 12322, 751, 2667, 891, 2428, 
    2581, 4246, 575, 703, 516, 2469, 630, 945, 3135, 1888, 2086, 
    2289, 2802, 3736, 1120, 1756, 1001, 2087, 4858, 3248, 253, 
    715, 154, 775, 1333, 1242, 1211, 1579, 6168, 3952, 1916, 
    1636, 276, 337, 919, 1194, 109, 610, 605, 982), Organism = c("OrgA", 
    "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", 
    "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", 
    "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", 
    "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", 
    "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", 
    "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", 
    "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", "OrgA", 
    "OrgA"), Concentration = c(0, 0, 6.25, 6.25, 12.5, 12.5, 
    25, 49.75, 62.5, 62.5, 0, 0, 6.25, 6.25, 12.5, 12.5, 25, 
    25, 49.75, 49.75, 62.5, 62.5, 0, 0, 6.25, 6.25, 12.5, 12.5, 
    25, 25, 49.75, 49.75, 62.5, 62.5, 0, 0, 6.25, 6.25, 12.5, 
    12.5, 25, 25, 49.75, 49.75, 62.5, 62.5, 0, 0, 6.25, 6.25, 
    12.5, 12.5, 25, 25, 49.75, 49.75, 62.5, 62.5)), class = "data.frame", row.names = c(NA, 
58L))

The first 20 rows look like this:

Batch Time Treatment Replicate Read_Count Organism Concentration
1     0L   T0         U         1       1014     OrgA          0.00
2     0L   T0         U         2       3040     OrgA          0.00
3     0L   T0         T         1         29     OrgA          6.25
4     0L   T0         T         2         76     OrgA          6.25
5     0L   T0         T         1          8     OrgA         12.50
6     0L   T0         T         2         86     OrgA         12.50
7     0L   T0         T         2         20     OrgA         25.00
8     0L   T0         T         1          0     OrgA         49.75
9     0L   T0         T         1          1     OrgA         62.50
10    0L   T0         T         2          8     OrgA         62.50
11  100L   T0         U         2       5412     OrgA          0.00
12  100L   T0         U         3       6286     OrgA          0.00
13  100L   T0         T         2       8272     OrgA          6.25
14  100L   T0         T         3      12322     OrgA          6.25
15  100L   T0         T         1        751     OrgA         12.50
16  100L   T0         T         2       2667     OrgA         12.50
17  100L   T0         T         1        891     OrgA         25.00
18  100L   T0         T         2       2428     OrgA         25.00
19  100L   T0         T         2       2581     OrgA         49.75
20  100L   T0         T         3       4246     OrgA         49.75

I want to perform a calculation on the data where I group by Batch, Time, Treatment, Organism, and Concentration. Subtracting the average of the T0 Read_Count where Treatment == U from the average of the T0 Read_Count where Treatment == T all divided by the average of the T0 Read_Count where Treatment == U all subtracted from 1.

For rows 11-14 in my example, I should have a final value of 1.760471876. Where the calculation is something like:

1-((average(c(5412, 6286))-average(c(8272, 12322))/average(c(5412, 6286)))

I tried this but it doesn't give me the correct final answer of 1.760471876

bisDF <- bis %>% group_by(Batch, Time, Treatment, Organism, Concentration) %>%
  mutate(Via_Average = (1-(mean(Read_Count)-mean(Read_Count[Treatment == "T"])/mean(Read_Count))))

Note that my dataframe would be much longer with multiple Organisms, not just the single organism I included here. Time could have values in addition to T0 although this dataset only has T0 as the Time. All TX values would be subtracted from the T0 value.


Solution

  • I believe you are trying to do something like this:

    library(dplyr)
    
    # Function to estimate the ratio of the mean in T to the U-mean
    f <- \(u, rc) mean(rc, na.rm=T)/mean(u)
    
    d |>
      mutate(u_conc = mean(Read_Count[Treatment=="U"], na.rm=T), .by = c(Batch, Time, Organism)) |> 
      filter(Treatment == "T") |> 
      summarize(ratio_T_to_U = f(u_conc, Read_Count),.by = c(Batch, Time, Organism, Concentration))
    

    Output:

       Batch Time Organism Concentration ratio_T_to_U
    1     0L   T0     OrgA          6.25  0.025900345
    2     0L   T0     OrgA         12.50  0.023186976
    3     0L   T0     OrgA         25.00  0.009866798
    4     0L   T0     OrgA         49.75  0.000000000
    5     0L   T0     OrgA         62.50  0.002220030
    6   100L   T0     OrgA          6.25  1.760471876
    7   100L   T0     OrgA         12.50  0.292186699
    8   100L   T0     OrgA         25.00  0.283723713
    9   100L   T0     OrgA         49.75  0.583604035
    10  100L   T0     OrgA         62.50  0.109249444
    11   25L   T0     OrgA          6.25  0.527638191
    12   25L   T0     OrgA         12.50  1.682747069
    13   25L   T0     OrgA         25.00  1.465661642
    14   25L   T0     OrgA         49.75  2.190284757
    15   25L   T0     OrgA         62.50  0.963484087
    16   50L   T0     OrgA          6.25  2.625000000
    17   50L   T0     OrgA         12.50  0.313471503
    18   50L   T0     OrgA         25.00  0.300841969
    19   50L   T0     OrgA         49.75  0.833873057
    20   50L   T0     OrgA         62.50  0.903497409
    21   75L   T0     OrgA          6.25  0.350988142
    22   75L   T0     OrgA         12.50  0.060573123
    23   75L   T0     OrgA         25.00  0.208794466
    24   75L   T0     OrgA         49.75  0.071047431
    25   75L   T0     OrgA         62.50  0.156818182