I have this coin flip data:
library(dplyr)
library(knitr)
library(kableExtra)
set.seed(123)
n_flips <- 100
flips <- sample(c("H", "T"), n_flips, replace = TRUE)
I manually calculate all conditional probabilities and summaries the results in a table:
get_conditional_prob <- function(sequence, data) {
n <- nchar(sequence)
windows <- character(length(data) - n)
next_outcomes <- character(length(data) - n)
for(i in 1:(length(data) - n)) {
windows[i] <- paste(data[i:(i+n-1)], collapse="")
next_outcomes[i] <- data[i+n]
}
matches <- windows == sequence
count <- sum(matches)
if(count > 0) {
next_after_matches <- next_outcomes[matches]
prob_h <- mean(next_after_matches == "H")
prob_t <- mean(next_after_matches == "T")
} else {
prob_h <- NA
prob_t <- NA
}
return(c(prob_h, prob_t, count))
}
sequences_1 <- c("H", "T")
sequences_2 <- c("HH", "HT", "TH", "TT")
sequences_3 <- c("HHH", "HHT", "HTH", "HTT", "THH", "THT", "TTH", "TTT")
sequences_4 <- c("HHHH", "HHHT", "HHTH", "HHTT", "HTHH", "HTHT", "HTTH", "HTTT",
"THHH", "THHT", "THTH", "THTT", "TTHH", "TTHT", "TTTH", "TTTT")
sequences_5 <- c("HHHHH", "HHHHT", "HHHTH", "HHHTT", "HHTHH", "HHTHT", "HHTTH", "HHTTT",
"HTHHH", "HTHHT", "HTHTH", "HTHTT", "HTTHH", "HTTHT", "HTTTH", "HTTTT",
"THHHH", "THHHT", "THHTH", "THHTT", "THTHH", "THTHT", "THTTH", "THTTT",
"TTHHH", "TTHHT", "TTHTH", "TTHTT", "TTTHH", "TTTHT", "TTTTH", "TTTTT")
all_sequences <- c(sequences_1, sequences_2, sequences_3, sequences_4, sequences_5)
results <- data.frame(
Sequence = character(),
Next_H = numeric(),
Next_T = numeric(),
Count = numeric(),
stringsAsFactors = FALSE
)
for(seq in all_sequences) {
probs <- get_conditional_prob(seq, flips)
results <- rbind(results,
data.frame(
Sequence = seq,
Next_H = probs[1],
Next_T = probs[2],
Count = probs[3]
))
}
results_formatted <- results %>%
mutate(
Length = nchar(Sequence),
Next_H = round(Next_H, 3),
Next_T = round(Next_T, 3)
) %>%
arrange(Length, Sequence) %>%
select(
'Pattern' = Sequence,
'Length' = Length,
'P(H|Pattern)' = Next_H,
'P(T|Pattern)' = Next_T,
'Occurrences' = Count
)
kable(results_formatted,
format = "html",
caption = "Conditional Probabilities in Coin Flip Sequence (up to length 5)",
align = c('l', 'c', 'c', 'c', 'c')) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "left",
font_size = 12
) %>%
add_header_above(c(" " = 2, "Conditional Probabilities" = 2, " " = 1)) %>%
row_spec(0, bold = TRUE) %>%
pack_rows("Single Flip", 1, 2) %>%
pack_rows("Two Flips", 3, 6) %>%
pack_rows("Three Flips", 7, 14) %>%
pack_rows("Four Flips", 15, 30) %>%
pack_rows("Five Flips", 31, 62)
Is there something I can do that automatically calculates all combinations without having to manually enumerate them? This will get quite lengthy to manually enumerate all combinations and then feed them into the code. Is there an easier way to do this?
flips <- c("H","T")
library(gtools)
lapply(1:5, \(r)
apply(permutations(2, r, flips, repeats=TRUE), 1, paste, collapse="")) |>
unlist()
[1] "H" "T" "HH" "HT" "TH" "TT" "HHH" "HHT"
[9] "HTH" "HTT" "THH" "THT" "TTH" "TTT" "HHHH" "HHHT"
[17] "HHTH" "HHTT" "HTHH" "HTHT" "HTTH" "HTTT" "THHH" "THHT"
[25] "THTH" "THTT" "TTHH" "TTHT" "TTTH" "TTTT" "HHHHH" "HHHHT"
[33] "HHHTH" "HHHTT" "HHTHH" "HHTHT" "HHTTH" "HHTTT" "HTHHH" "HTHHT"
[41] "HTHTH" "HTHTT" "HTTHH" "HTTHT" "HTTTH" "HTTTT" "THHHH" "THHHT"
[49] "THHTH" "THHTT" "THTHH" "THTHT" "THTTH" "THTTT" "TTHHH" "TTHHT"
[57] "TTHTH" "TTHTT" "TTTHH" "TTTHT" "TTTTH" "TTTTT"