I have data as objects like this
set.seed(1)
make_rle <- function() rnorm(10) |> cumsum() |> sign() |> accelerometry::rle2(indices = T)
X <- lapply(1:10, \(x) make_rle())
X
[[1]]
value start stop length
[1,] -1 1 2 2
[2,] 1 3 3 1
[3,] -1 4 6 3
[4,] 1 7 10 4
[[2]]
value start stop length
[1,] 1 1 3 3
[2,] -1 4 4 1
[3,] 1 5 5 1
[4,] -1 6 6 1
[5,] 1 7 10 4
[[3]]
value start stop length
[1,] 1 1 10 10
[[4]]
value start stop length
[1,] 1 1 6 6
[2,] -1 7 10 4
and so on
How can I correctly convert this data to get tabular data with the same number of columns. Tabular data is needed for machine learning
I can add to each matrix NA
for example
add_na <- function(x, n= 10) rbind(x, matrix(NA,nrow = n-nrow(x),ncol = 4))
Xna <- lapply(X, add_na)
Xna
[[1]]
value start stop length
[1,] -1 1 2 2
[2,] 1 3 3 1
[3,] -1 4 6 3
[4,] 1 7 10 4
[5,] NA NA NA NA
[6,] NA NA NA NA
[7,] NA NA NA NA
[8,] NA NA NA NA
[9,] NA NA NA NA
[10,] NA NA NA NA
[[2]]
value start stop length
[1,] 1 1 3 3
[2,] -1 4 4 1
[3,] 1 5 5 1
[4,] -1 6 6 1
[5,] 1 7 10 4
[6,] NA NA NA NA
[7,] NA NA NA NA
[8,] NA NA NA NA
[9,] NA NA NA NA
[10,] NA NA NA NA
and so on
And then convert the matrix to a vector and thereby get tabular data
to_tab <- sapply(Xna, as.vector) |> t()
My question is whether I'm doing it right or some information will be lost during such a conversion. What is the correct solution in my case?
(maxrows <- max(sapply(X, nrow)))
# [1] 5
X2 <- lapply(X, function(z) rbind(z, z[1,][rep(NA, maxrows - nrow(z)),]))
X2
# [[1]]
# value start stop length
# [1,] -1 1 2 2
# [2,] 1 3 3 1
# [3,] -1 4 6 3
# [4,] 1 7 10 4
# NA NA NA NA NA
# [[2]]
# value start stop length
# [1,] 1 1 3 3
# [2,] -1 4 4 1
# [3,] 1 5 5 1
# [4,] -1 6 6 1
# [5,] 1 7 10 4
# [[3]]
# value start stop length
# [1,] 1 1 10 10
# NA NA NA NA NA
# NA.1 NA NA NA NA
# NA.2 NA NA NA NA
# NA.3 NA NA NA NA
# [[4]]
# value start stop length
# [1,] 1 1 6 6
# [2,] -1 7 10 4
# NA NA NA NA NA
# NA.1 NA NA NA NA
# NA.2 NA NA NA NA
You can prefix this with do.call(cbind,
to result in a single wider matrix:
do.call(cbind, X2)
# value start stop length value start stop length value start stop length value start stop length
# [1,] -1 1 2 2 1 1 3 3 1 1 10 10 1 1 6 6
# [2,] 1 3 3 1 -1 4 4 1 NA NA NA NA -1 7 10 4
# [3,] -1 4 6 3 1 5 5 1 NA NA NA NA NA NA NA NA
# [4,] 1 7 10 4 -1 6 6 1 NA NA NA NA NA NA NA NA
# NA NA NA NA NA 1 7 10 4 NA NA NA NA NA NA NA NA
Data
X <- list(structure(list(value = c(-1L, 1L, -1L, 1L), start = c(1L, 3L, 4L, 7L), stop = c(2L, 3L, 6L, 10L), length = c(2L, 1L, 3L, 4L)), class = "data.frame", row.names = c(NA, -4L)), structure(list(value = c(1L, -1L, 1L, -1L, 1L), start = c(1L, 4L, 5L, 6L, 7L), stop = c(3L, 4L, 5L, 6L, 10L), length = c(3L, 1L, 1L, 1L, 4L)), class = "data.frame", row.names = c(NA, -5L)), structure(list(value = 1L, start = 1L, stop = 10L, length = 10L), class = "data.frame", row.names = c(NA, -1L)), structure(list(value = c(1L, -1L), start = c(1L, 7L), stop = c(6L, 10L), length = c(6L, 4L)), class = "data.frame", row.names = c(NA, -2L)))