rtime-seriesdiscrete

Analysing the pattern in the growth of organisms using discrete data


I have data of the size of 200 individuals of a particular plant species. But the size was measured in an indirect way, counting the number of leaves (discrete data), monthly during a total of 14 months. The germination, growth and death of the plants are very irregular, with some plants having a long life span, other dying quickly, and also the germination in time is irregular: new plants kept germinating during all the study period and being incorporated into the study. Here is an example of my data (numbers inside cells refer to the number of leaves):

Month.1 Month.2 Month.3 Month.4 Month.5 Month.6 Month.7
plant.1 3 21 15 - - - -
plant.2 - 7 14 - - - -
plant.3 - 8 12 10 - - -
plant.4 - - 1 3 5 - -
plant.5 - 3 6 18 13 4 -
..... ... ... ... ... ... ... ...

Following Shibaprasadb's suggestion, here is a subset of my data, already converted to long format:

df <- dput(df)
structure(list(plant = c(1L, 1L, 1L, 1L, 1L, 2L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 28L, 29L, 29L, 29L, 30L, 31L, 32L, 32L, 
32L, 32L, 32L, 62L, 62L, 63L, 64L, 65L, 65L, 65L, 65L, 66L, 67L, 
67L, 67L), month = c(4L, 5L, 6L, 7L, 8L, 4L, 7L, 8L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 7L, 6L, 7L, 8L, 6L, 7L, 4L, 5L, 6L, 7L, 8L, 
5L, 6L, 6L, 6L, 5L, 6L, 7L, 8L, 8L, 2L, 3L, 4L), time = c(0L, 
1L, 2L, 3L, 4L, 0L, 0L, 1L, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 0L, 0L, 
1L, 2L, 0L, 0L, 0L, 1L, 2L, 3L, 4L, 0L, 1L, 0L, 0L, 0L, 1L, 2L, 
3L, 0L, 0L, 1L, 2L), leaves = c(6L, 18L, 9L, 24L, 6L, 12L, 6L, 
6L, 63L, 66L, 15L, 9L, 15L, 21L, 12L, 12L, 3L, 42L, 12L, 9L, 
3L, 15L, 21L, 18L, 27L, 15L, 21L, 36L, 24L, 21L, 3L, 12L, 18L, 
6L, 3L, 15L, 3L, 3L), tray = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), soil_nutrient = c(62.4, 
71, 13.6, 37.6, 13.8, 62.4, 37.6, 13.8, 47.6, 51.8, 62.4, 71, 
13.6, 37.6, 13.8, 25.4, 14.2, 25.4, 17, 14.2, 25.4, 51.6, 72.6, 
14.2, 25.4, 17, 148.2, 29.4, 29.4, 29.4, 148.2, 29.4, 103.6, 
122.4, 122.4, 116.2, 141, 117.6), watering = c(81.6, 89, 10.6, 
57.8, 1.2, 81.6, 57.8, 1.2, 46, 48.6, 81.6, 89, 10.6, 57.8, 1.2, 
57.8, 10.6, 57.8, 1.2, 10.6, 57.8, 81.6, 89, 10.6, 57.8, 1.2, 
89, 10.6, 10.6, 10.6, 89, 10.6, 57.8, 1.2, 1.2, 46, 48.6, 81.6
), UV_treatment = c("n", "n", "n", "n", "n", "n", "n", "n", "n", 
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", 
"n", "n", "n", "n", "n", "y", "y", "y", "n", "y", "n", "n", "n", 
"n", "n", "n")), class = "data.frame", row.names = c(NA, -38L
))

df
   plant month time leaves tray soil_nutrient watering UV_treatment
1      1     4    0      6    1          62.4     81.6            n
2      1     5    1     18    1          71.0     89.0            n
3      1     6    2      9    1          13.6     10.6            n
4      1     7    3     24    1          37.6     57.8            n
5      1     8    4      6    1          13.8      1.2            n
6      2     4    0     12    1          62.4     81.6            n
7      3     7    0      6    1          37.6     57.8            n
8      3     8    1      6    1          13.8      1.2            n
9      4     2    0     63    1          47.6     46.0            n
10     4     3    1     66    1          51.8     48.6            n
11     4     4    2     15    1          62.4     81.6            n
12     4     5    3      9    1          71.0     89.0            n
13     4     6    4     15    1          13.6     10.6            n
14     4     7    5     21    1          37.6     57.8            n
15     4     8    6     12    1          13.8      1.2            n
16    28     7    0     12    3          25.4     57.8            n
17    29     6    0      3    3          14.2     10.6            n
18    29     7    1     42    3          25.4     57.8            n
19    29     8    2     12    3          17.0      1.2            n
20    30     6    0      9    3          14.2     10.6            n
21    31     7    0      3    3          25.4     57.8            n
22    32     4    0     15    3          51.6     81.6            n
23    32     5    1     21    3          72.6     89.0            n
24    32     6    2     18    3          14.2     10.6            n
25    32     7    3     27    3          25.4     57.8            n
26    32     8    4     15    3          17.0      1.2            n
27    62     5    0     21    7         148.2     89.0            n
28    62     6    1     36    7          29.4     10.6            y
29    63     6    0     24    7          29.4     10.6            y
30    64     6    0     21    7          29.4     10.6            y
31    65     5    0      3    7         148.2     89.0            n
32    65     6    1     12    7          29.4     10.6            y
33    65     7    2     18    7         103.6     57.8            n
34    65     8    3      6    7         122.4      1.2            n
35    66     8    0      3    7         122.4      1.2            n
36    67     2    0     15    7         116.2     46.0            n
37    67     3    1      3    7         141.0     48.6            n
38    67     4    2      3    7         117.6     81.6            n

I'm interested in checking if there is some pattern in their growth, I mean if they first show an increase in the number of leaves, if they then get stable, and if they suddenly die or gradually decrease the number of leaves before dying.

I've been looking for phenology analysis in R, but what I've found so far is mainly related to climate change, global parameters, etc. On the other hand, growth analysis considering time series is focused on a long sequence of data of a particular variable, not several individuals.


Solution

  • library(tidyverse)
    library(ggpubr)
    library(lmerTest)
    #> Loading required package: lme4
    #> Loading required package: Matrix
    #> 
    #> Attaching package: 'Matrix'
    #> The following objects are masked from 'package:tidyr':
    #> 
    #>     expand, pack, unpack
    #> 
    #> Attaching package: 'lmerTest'
    #> The following object is masked from 'package:lme4':
    #> 
    #>     lmer
    #> The following object is masked from 'package:stats':
    #> 
    #>     step
    
    data <- tibble::tribble(
      ~Plant, ~Month.1, ~Moßnth.2, ~Month.3, ~Month.4, ~Month.5, ~Month.6, ~Month.7,
      "plant.1",      "3",     "21",      "15",      "-",      "-",      "-",      "-",
      "plant.2",      "-",      "7",      "14",      "-",      "-",      "-",      "-",
      "plant.3",      "-",      "8",      "12",     "10",      "-",      "-",      "-",
      "plant.4",      "-",      "-",       "1",      "3",      "5",      "-",      "-",
      "plant.5",      "-",      "3",       "6",     "18",     "13",      "4",      "-"
    )
    
    data <-
      data %>% 
      pivot_longer(-Plant, names_to = "Month", values_to = "n_leafs") %>%
      mutate(
        n_leafs = n_leafs %>% as.numeric(),
        Month = Month %>% str_extract("[0-9]+$") %>% as.numeric(),
        Plant = Plant %>% str_extract("[0-9]+$") %>% as.numeric()
      ) %>%
      # Normalization: Just count the number of months since seeding for each Plant
      group_by(Plant) %>%
      mutate(Month = Month - min(Month)) %>%
      ungroup()
    #> Warning in n_leafs %>% as.numeric(): NAs introduced by coercion
    
    #
    # The overall trend is that the number of leaves are getting lesser over time.
    # However, this is not significant.
    #
    
    data %>%
      ggplot(aes(Month, n_leafs, color = Plant)) +
      geom_point() +
      stat_smooth(method = "lm") +
      stat_cor()
    #> `geom_smooth()` using formula 'y ~ x'
    #> Warning: Removed 19 rows containing non-finite values (stat_smooth).
    #> Warning: Removed 19 rows containing non-finite values (stat_cor).
    #> Warning: Removed 19 rows containing missing values (geom_point).
    

    #
    # The number of leaves are not significantly different between the months
    #
    
    data %>%
      ggplot(aes(factor(Month), n_leafs)) +
      geom_boxplot() +
      stat_compare_means()
    #> Warning: Removed 19 rows containing non-finite values (stat_boxplot).
    #> Warning: Removed 19 rows containing non-finite values (stat_compare_means).
    

    
    #
    # There is still no significance after controlling for the different plants
    #
    
    lmer(n_leafs ~ Month + (1|Plant), data = data) %>%
      summary()
    #> Linear mixed model fit by REML. t-tests use Satterthwaite's method [
    #> lmerModLmerTest]
    #> Formula: n_leafs ~ Month + (1 | Plant)
    #>    Data: data
    #> 
    #> REML criterion at convergence: 96.8
    #> 
    #> Scaled residuals: 
    #>     Min      1Q  Median      3Q     Max 
    #> -1.1084 -0.7883 -0.2424  0.6603  1.8827 
    #> 
    #> Random effects:
    #>  Groups   Name        Variance Std.Dev.
    #>  Plant    (Intercept)  4.789   2.188   
    #>  Residual             34.943   5.911   
    #> Number of obs: 16, groups:  Plant, 5
    #> 
    #> Fixed effects:
    #>             Estimate Std. Error      df t value Pr(>|t|)  
    #> (Intercept)   8.2657     3.2163  9.1434   2.570   0.0298 *
    #> Month         0.3186     1.2149 13.4746   0.262   0.7971  
    #> ---
    #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    #> 
    #> Correlation of Fixed Effects:
    #>       (Intr)
    #> Month -0.831
    

    Created on 2021-09-26 by the reprex package (v2.0.1)