rmatrix

Recording the First Time an Event Happens


I have this 4 state Markov Chain in R:

enter image description here

I wrote the following (100 iteration) simulation that simulates 100 hospital patients (all patients start in state 1) and tracks which state they are in during each iteration of the simulation. At the start of each iteration, we see which state each patient is in as of the last iteration... and then move the patient according to the transition matrix probabilities (do this for each patient). E.g.

I then repeated this entire process 1000 times and plotted the results:

   library(reshape2)
library(ggplot2)
library(dplyr)
library(tidyr)

# transition matrix
P <- matrix(c(1/3, 1/3, 1/3, 0,
              1/2, 1/2, 0,   0,
              0,   0,   1/2, 1/2,
              0,   0,   0,   1),
            nrow = 4, ncol = 4, byrow = TRUE)

n_iterations <- 100
n_people <- 100
n_simulations <- 100

run_simulation <- function(sim_number) {
    state_matrix <- matrix(0, nrow = n_iterations, ncol = n_people)
    state_matrix[1, ] <- 1  # All start in state 1
    
    for (i in 2:n_iterations) {
        for (person in 1:n_people) {
            current_state <- state_matrix[i-1, person]
            state_matrix[i, person] <- sample(1:4, 1, prob = P[current_state, ])
        }
    }
    
    state_counts <- apply(state_matrix, 1, function(x) table(factor(x, levels = 1:4)))
    percentages <- as.data.frame(t(state_counts) / n_people * 100)  # Convert to percentages (0-100)
    
    # Create patient summary
    patient_summary <- data.frame(
        patient = 1:n_people,
        simulation = sim_number,
        state1 = rowSums(state_matrix == 1),
        state2 = rowSums(state_matrix == 2),
        state3 = rowSums(state_matrix == 3),
        state4 = rowSums(state_matrix == 4)
    )
    
    list(percentages = percentages, patient_summary = patient_summary)
}

# Run simulations
set.seed(123)  # For reproducibility
all_simulations <- vector("list", n_simulations)
all_patient_summaries <- vector("list", n_simulations)

for (sim in 1:n_simulations) {
    simulation_result <- run_simulation(sim)
    all_simulations[[sim]] <- simulation_result$percentages
    all_patient_summaries[[sim]] <- simulation_result$patient_summary
    if (sim %% 10 == 0) {  # Print progress every 10 simulations
        cat(sprintf("Completed simulation %d of %d\n", sim, n_simulations))
    }
}

# Combine all patient summaries into one data frame
final_patient_summary <- do.call(rbind, all_patient_summaries)

# Reorder columns to put patient and simulation first
final_patient_summary <- final_patient_summary[, c("patient", "simulation", "state1", "state2", "state3", "state4")]

# calculate the average percentages across all simulations
average_percentages <- Reduce(`+`, all_simulations) / n_simulations
average_percentages$iteration <- 1:n_iterations

# function to calculate confidence intervals
calculate_ci <- function(x) {
    quantile(x, probs = c(0.05, 0.95))
}

# confidence intervals
ci_data <- lapply(1:n_iterations, function(i) {
    data.frame(
        iteration = i,
        state = as.character(1:4),
        lower = sapply(1:4, function(j) calculate_ci(sapply(all_simulations, function(sim) sim[i, j]))[1]),
        upper = sapply(1:4, function(j) calculate_ci(sapply(all_simulations, function(sim) sim[i, j]))[2])
    )
})
ci_data <- do.call(rbind, ci_data)

# reshape 
average_percentages_long <- average_percentages %>%
    pivot_longer(cols = as.character(1:4),
                 names_to = "state",
                 values_to = "percentage")

plot_data <- merge(average_percentages_long, ci_data, by = c("iteration", "state"))

ggplot(plot_data, aes(x = iteration, y = percentage, color = state, fill = state)) +
    geom_line() +
    geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
    scale_color_manual(values = c("1" = "blue", "2" = "red", "3" = "green", "4" = "purple"),
                       labels = paste("State", 1:4)) +
    scale_fill_manual(values = c("1" = "blue", "2" = "red", "3" = "green", "4" = "purple"),
                      labels = paste("State", 1:4)) +
    labs(x = "Iteration", y = "Average Percentage of People", color = "State", fill = "State") +
    ggtitle("Average Percentage of People in Each State Over Time (1000 Simulations)") +
    theme_minimal() +
    scale_y_continuous(limits = c(0, 100))

print(head(final_patient_summary))

enter image description here

Objective: I am trying to modify this simulation to record at each iteration: how many patients are first entering state 4 vs how many patient are already in state 4. For example:

  simulation iteration patient state
          1        1      87     ...
          1        1      88     ...
          1        1      89     ...
          ..      ...      ...   ...
          1        2      87     ...
          1        2      88     ...
          1        2      89     ...
          ..      ...      ...   ...
          1        3      87     ...
          1        3      88     ...
          1        3      89     ...
          ...      ...      ...   ...
          2        1      87     ...
          2        1      88     ...
          2        1      89     ...
          ...     ...      ...   ...
          2        2      87     ...
          2        2      88     ...
          2        2      89     ...
          ...      ...     ...   ...

Here, the state column will have values: state1, state2, state3, state_4_new, state_4_existing

R code for visualizing the Markov Chain

grViz("
  digraph {
    rankdir=LR;
    node [shape = circle]
    1 [label = '1', style = filled, fillcolor = green]
    2 [label = '2']
    3 [label = '3']
    4 [label = '4', style = filled, fillcolor = red]
    
    1 -> 1 [label = '1/3']
    1 -> 2 [label = '1/3']
    1 -> 3 [label = '1/3']
    2 -> 1 [label = '1/2']
    2 -> 2 [label = '1/2']
    3 -> 3 [label = '1/2']
    3 -> 4 [label = '1/2']
    4 -> 4 [label = '1']
  }
")

Solution

  • Simply add a new state (e.g. 5). Any patient in state 4, will be "discharged" with a probability of 1. So, they will be in state 4 for one iteration but in the next iterations they will be in state 5. Your transition matrix will be like:

    # transition matrix
    P <- matrix(c(1/3, 1/3, 1/3, 0  , 0, 
                  1/2, 1/2, 0,   0  , 0, 
                  0,   0,   1/2, 1/2, 0, 
                  0,   0,   0,   0  , 1,
                  0,   0,   0,   0  , 1), 
                nrow = 5, ncol = 5, byrow = TRUE)
    

    Here is a diagram to visualize that:

    DiagrammeR::grViz("
      digraph {
        rankdir=LR;
        node [shape = circle]
        0 [label = 'New', style = filled, fillcolor = grey]
        1 [label = '1', style = filled, fillcolor = green]
        2 [label = '2']
        3 [label = '3']
        4 [label = '4', style = filled, fillcolor = yellow]
        5 [label = '5', style = filled, fillcolor = red]
        
        0 -> 1 [label = 'Admitted']
        1 -> 1 [label = '1/3']
        1 -> 2 [label = '1/3']
        1 -> 3 [label = '1/3']
        2 -> 1 [label = '1/2']
        2 -> 2 [label = '1/2']
        3 -> 3 [label = '1/2']
        3 -> 4 [label = '1/2']
        4 -> 5 [label = '1 (discharged)']
        5 -> 5 [label = '1']
      }
    ")