I have this 4 state Markov Chain in R:
I wrote the following (100 iteration) simulation that simulates 100 hospital patients (all patients start in state 1) and tracks which state they are in during each iteration of the simulation. At the start of each iteration, we see which state each patient is in as of the last iteration... and then move the patient according to the transition matrix probabilities (do this for each patient). E.g.
I then repeated this entire process 1000 times and plotted the results:
library(reshape2)
library(ggplot2)
library(dplyr)
library(tidyr)
# transition matrix
P <- matrix(c(1/3, 1/3, 1/3, 0,
1/2, 1/2, 0, 0,
0, 0, 1/2, 1/2,
0, 0, 0, 1),
nrow = 4, ncol = 4, byrow = TRUE)
n_iterations <- 100
n_people <- 100
n_simulations <- 100
run_simulation <- function(sim_number) {
state_matrix <- matrix(0, nrow = n_iterations, ncol = n_people)
state_matrix[1, ] <- 1 # All start in state 1
for (i in 2:n_iterations) {
for (person in 1:n_people) {
current_state <- state_matrix[i-1, person]
state_matrix[i, person] <- sample(1:4, 1, prob = P[current_state, ])
}
}
state_counts <- apply(state_matrix, 1, function(x) table(factor(x, levels = 1:4)))
percentages <- as.data.frame(t(state_counts) / n_people * 100) # Convert to percentages (0-100)
# Create patient summary
patient_summary <- data.frame(
patient = 1:n_people,
simulation = sim_number,
state1 = rowSums(state_matrix == 1),
state2 = rowSums(state_matrix == 2),
state3 = rowSums(state_matrix == 3),
state4 = rowSums(state_matrix == 4)
)
list(percentages = percentages, patient_summary = patient_summary)
}
# Run simulations
set.seed(123) # For reproducibility
all_simulations <- vector("list", n_simulations)
all_patient_summaries <- vector("list", n_simulations)
for (sim in 1:n_simulations) {
simulation_result <- run_simulation(sim)
all_simulations[[sim]] <- simulation_result$percentages
all_patient_summaries[[sim]] <- simulation_result$patient_summary
if (sim %% 10 == 0) { # Print progress every 10 simulations
cat(sprintf("Completed simulation %d of %d\n", sim, n_simulations))
}
}
# Combine all patient summaries into one data frame
final_patient_summary <- do.call(rbind, all_patient_summaries)
# Reorder columns to put patient and simulation first
final_patient_summary <- final_patient_summary[, c("patient", "simulation", "state1", "state2", "state3", "state4")]
# calculate the average percentages across all simulations
average_percentages <- Reduce(`+`, all_simulations) / n_simulations
average_percentages$iteration <- 1:n_iterations
# function to calculate confidence intervals
calculate_ci <- function(x) {
quantile(x, probs = c(0.05, 0.95))
}
# confidence intervals
ci_data <- lapply(1:n_iterations, function(i) {
data.frame(
iteration = i,
state = as.character(1:4),
lower = sapply(1:4, function(j) calculate_ci(sapply(all_simulations, function(sim) sim[i, j]))[1]),
upper = sapply(1:4, function(j) calculate_ci(sapply(all_simulations, function(sim) sim[i, j]))[2])
)
})
ci_data <- do.call(rbind, ci_data)
# reshape
average_percentages_long <- average_percentages %>%
pivot_longer(cols = as.character(1:4),
names_to = "state",
values_to = "percentage")
plot_data <- merge(average_percentages_long, ci_data, by = c("iteration", "state"))
ggplot(plot_data, aes(x = iteration, y = percentage, color = state, fill = state)) +
geom_line() +
geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
scale_color_manual(values = c("1" = "blue", "2" = "red", "3" = "green", "4" = "purple"),
labels = paste("State", 1:4)) +
scale_fill_manual(values = c("1" = "blue", "2" = "red", "3" = "green", "4" = "purple"),
labels = paste("State", 1:4)) +
labs(x = "Iteration", y = "Average Percentage of People", color = "State", fill = "State") +
ggtitle("Average Percentage of People in Each State Over Time (1000 Simulations)") +
theme_minimal() +
scale_y_continuous(limits = c(0, 100))
print(head(final_patient_summary))
Objective: I am trying to modify this simulation to record at each iteration: how many patients are first entering state 4 vs how many patient are already in state 4. For example:
simulation iteration patient state
1 1 87 ...
1 1 88 ...
1 1 89 ...
.. ... ... ...
1 2 87 ...
1 2 88 ...
1 2 89 ...
.. ... ... ...
1 3 87 ...
1 3 88 ...
1 3 89 ...
... ... ... ...
2 1 87 ...
2 1 88 ...
2 1 89 ...
... ... ... ...
2 2 87 ...
2 2 88 ...
2 2 89 ...
... ... ... ...
Here, the state column will have values: state1, state2, state3, state_4_new, state_4_existing
R code for visualizing the Markov Chain
grViz("
digraph {
rankdir=LR;
node [shape = circle]
1 [label = '1', style = filled, fillcolor = green]
2 [label = '2']
3 [label = '3']
4 [label = '4', style = filled, fillcolor = red]
1 -> 1 [label = '1/3']
1 -> 2 [label = '1/3']
1 -> 3 [label = '1/3']
2 -> 1 [label = '1/2']
2 -> 2 [label = '1/2']
3 -> 3 [label = '1/2']
3 -> 4 [label = '1/2']
4 -> 4 [label = '1']
}
")
Simply add a new state (e.g. 5). Any patient in state 4, will be "discharged" with a probability of 1. So, they will be in state 4 for one iteration but in the next iterations they will be in state 5. Your transition matrix will be like:
# transition matrix
P <- matrix(c(1/3, 1/3, 1/3, 0 , 0,
1/2, 1/2, 0, 0 , 0,
0, 0, 1/2, 1/2, 0,
0, 0, 0, 0 , 1,
0, 0, 0, 0 , 1),
nrow = 5, ncol = 5, byrow = TRUE)
Here is a diagram to visualize that:
DiagrammeR::grViz("
digraph {
rankdir=LR;
node [shape = circle]
0 [label = 'New', style = filled, fillcolor = grey]
1 [label = '1', style = filled, fillcolor = green]
2 [label = '2']
3 [label = '3']
4 [label = '4', style = filled, fillcolor = yellow]
5 [label = '5', style = filled, fillcolor = red]
0 -> 1 [label = 'Admitted']
1 -> 1 [label = '1/3']
1 -> 2 [label = '1/3']
1 -> 3 [label = '1/3']
2 -> 1 [label = '1/2']
2 -> 2 [label = '1/2']
3 -> 3 [label = '1/2']
3 -> 4 [label = '1/2']
4 -> 5 [label = '1 (discharged)']
5 -> 5 [label = '1']
}
")