I want to identify temporal patterns in a time series.
structure(list(ID = c("a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
"v", "w", "x"), `2016/01` = c(1, NA, NA, 1, NA, NA, 1, NA, NA,
1, NA, 1, 1, 1, NA, 1, NA, NA, 1, NA, NA, 1, NA, NA), `2016/02` = c(NA,
1, NA, NA, 1, NA, NA, 1, NA, NA, 1, 1, 1, NA, 1, NA, 1, NA, NA,
1, NA, NA, 1, NA), `2016/03` = c(NA, NA, 1, NA, NA, 1, NA, NA,
1, 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, NA, 1, NA, NA, 1), `2016/04` = c(NA,
NA, NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, 1, NA, 1, NA, NA, NA,
1, NA, NA, NA, NA, NA), `2016/05` = c(NA, NA, NA, NA, 1, NA,
NA, NA, NA, 1, NA, 1, 1, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA,
NA), `2016/06` = c(NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, 1,
1, 1, NA, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA), `2016/07` = c(NA,
NA, NA, 1, NA, NA, 1, NA, NA, 1, NA, 1, 1, 1, NA, 1, NA, NA,
1, NA, NA, NA, NA, NA), `2016/08` = c(NA, NA, NA, NA, 1, NA,
NA, 1, NA, NA, 1, 1, 1, NA, 1, NA, 1, NA, NA, 1, NA, NA, NA,
NA), `2016/09` = c(NA, NA, NA, NA, NA, 1, NA, NA, 1, 1, NA, 1,
1, 1, NA, NA, NA, 1, NA, NA, 1, NA, NA, NA), `2016/10` = c(NA,
NA, NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, 1, NA, 1, NA, NA, NA,
1, NA, NA, NA, NA, NA), `2016/11` = c(NA, NA, NA, NA, 1, NA,
NA, NA, NA, 1, NA, 1, 1, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA,
NA), `2016/12` = c(NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, 1,
1, 1, NA, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA), `2017/01` = c(1,
NA, NA, 1, NA, NA, 1, NA, NA, 1, NA, 1, 1, 1, NA, 1, NA, NA,
1, NA, NA, 1, NA, NA), `2017/02` = c(NA, 1, NA, NA, 1, NA, NA,
1, NA, NA, 1, 1, 1, NA, 1, NA, 1, NA, NA, 1, NA, NA, 1, NA),
`2017/03` = c(NA, NA, 1, NA, NA, 1, NA, NA, 1, 1, NA, 1,
1, 1, NA, NA, NA, 1, NA, NA, 1, NA, NA, 1), `2017/04` = c(NA,
NA, NA, 1, NA, NA, NA, NA, NA, NA, 1, 1, 1, NA, 1, NA, NA,
NA, 1, NA, NA, NA, NA, NA), `2017/05` = c(NA, NA, NA, NA,
1, NA, NA, NA, NA, 1, NA, 1, 1, 1, NA, NA, NA, NA, NA, 1,
NA, NA, NA, NA), `2017/06` = c(NA, NA, NA, NA, NA, 1, NA,
NA, NA, NA, 1, 1, 1, NA, 1, NA, NA, NA, NA, NA, 1, NA, NA,
NA), `2017/07` = c(NA, NA, NA, 1, NA, NA, 1, NA, NA, 1, NA,
1, 1, 1, NA, 1, NA, NA, 1, NA, NA, NA, NA, NA), `2017/08` = c(NA,
NA, NA, NA, 1, NA, NA, 1, NA, NA, 1, 1, 1, NA, 1, NA, 1,
NA, NA, 1, NA, NA, NA, NA), `2017/09` = c(NA, NA, NA, NA,
NA, 1, NA, NA, NA, 1, NA, 1, 1, 1, NA, NA, NA, NA, NA, NA,
1, NA, NA, NA), `2017/10` = c(NA, NA, NA, 1, NA, NA, NA,
NA, NA, NA, 1, 1, 1, NA, 1, NA, NA, NA, 1, NA, NA, NA, NA,
NA), `2017/11` = c(NA, NA, NA, NA, 1, NA, NA, NA, NA, 1,
NA, 1, 1, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA), `2017/12` = c(1,
NA, NA, NA, NA, 1, NA, NA, NA, NA, 1, 1, 1, NA, 1, NA, NA,
NA, NA, NA, 1, 1, NA, NA), `2018/01` = c(NA, 1, NA, 1, NA,
NA, 1, NA, NA, 1, NA, 1, 1, 1, NA, 1, NA, NA, 1, NA, NA,
NA, 1, NA), `2018/02` = c(NA, NA, 1, NA, 1, NA, NA, 1, NA,
NA, 1, 1, 1, NA, 1, NA, 1, NA, NA, 1, NA, NA, NA, 1), `2018/03` = c(NA,
NA, NA, NA, NA, 1, NA, NA, 1, 1, NA, 1, 1, 1, NA, NA, NA,
1, NA, NA, 1, NA, NA, NA), `2018/04` = c(NA, NA, NA, 1, NA,
NA, NA, NA, NA, NA, 1, 1, 1, NA, 1, NA, NA, NA, 1, NA, NA,
NA, NA, NA), `2018/05` = c(NA, NA, NA, NA, 1, NA, NA, NA,
NA, 1, NA, 1, 1, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA, NA
), `2018/06` = c(NA, NA, NA, NA, NA, 1, NA, NA, NA, NA, 1,
1, 1, NA, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA), `2018/07` = c(NA,
NA, NA, 1, NA, NA, 1, NA, NA, 1, NA, 1, 1, 1, NA, 1, NA,
NA, 1, NA, NA, NA, NA, NA), `2018/08` = c(NA, NA, NA, NA,
1, NA, NA, 1, NA, NA, 1, 1, 1, NA, 1, NA, 1, NA, NA, 1, NA,
NA, NA, NA), `2018/09` = c(NA, NA, NA, NA, NA, 1, NA, NA,
1, 1, NA, 1, 1, 1, NA, NA, NA, 1, NA, NA, 1, NA, NA, NA),
`2018/10` = c(NA, NA, NA, 1, NA, NA, NA, NA, NA, NA, 1, 1,
1, NA, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA), `2018/11` = c(NA,
NA, NA, NA, 1, NA, NA, NA, NA, 1, NA, 1, 1, 1, NA, NA, NA,
NA, NA, 1, NA, NA, NA, NA), `2018/12` = c(NA, NA, NA, NA,
NA, 1, NA, NA, NA, NA, 1, 1, 1, NA, 1, NA, NA, NA, NA, NA,
1, NA, NA, NA)), row.names = c(NA, -24L), class = c("tbl_df",
"tbl", "data.frame"))
In the upper data frame individual:
List item a has the same pattern as v b has the same pattern as w c has the same pattern as x
In the upper data frame individuals a, b, c, v, w and x have the same frequency - yearly.
The are some other cases as bimensal, quarterly and semestral.
My objective is to identify all this cases and classify all individuals with a time pattern.
I suppose that the package arulesSequences can be useful.
Can you help me please?
I think a good start would be a full hierarchical clustering:
library(gplots)
library(dendsort)
# data preparation
dm <- matrix( as.numeric(!is.na(dat[,-1])), nrow=nrow(dat[,-1]) )
rownames(dm) <- dat$ID
colnames(dm) <- colnames(dat[,-1])
heatmap.2( dm, trace="none", hclustfun=function(x){
dendsort(hclust(x, method="single"), type="average")
}, col=c("grey90","darkblue") )
Clearly visible are all time dependent connections through the columns. I included dendsort to bring similar clusters together to make ID related patterns more obvious.
Also, only plotting the row-cluster lets you visualize the temporal patterns better.
heatmap.2( dm, trace="none", Colv=NA, dendrogram="row",
hclustfun=function(x){ dendsort(hclust(x, method="single"),
type="average") }, col=c("grey90","darkblue") )
Adding a summary and k-means for comparison:
hierarchical cluster
dis <- dist(dm, method="euclidean")
hc <- hclust(dis, method="single")
# choose the height where to cut
# lower means more fine grained cluster, less member per cluster
cutree(hc, h=4)
a b c d e f g h i j k l m n o p q r s t u v w x
1 2 1 3 2 4 1 2 1 5 6 7 7 5 6 1 2 1 3 2 4 1 2 1
# higher h means larger clusters, i.e. more member per cluster
cutree(hc, h=5)
a b c d e f g h i j k l m n o p q r s t u v w x
1 2 1 1 2 1 1 2 1 1 2 3 3 1 2 1 2 1 1 2 1 1 2 1
k-means
# pre-defining k=6, has to be rerun to change k
km <- kmeans(dm, 6, algorithm="Hartigan-Wong")
km$cluster
a b c d e f g h i j k l m n o p q r s t u v w x
2 5 2 6 5 4 2 5 4 3 1 1 1 3 1 2 5 4 6 5 4 2 5 2