rdataframelistfillfactors

inputting ordered values from a list of dataframe with different row numbers into a list of dataframe with equal row numbers


I am stuck with this problem. Please note that I have edited the original question because I have forgotten to provide crucial details for solving this problem.

My sincere apologies for this mistake go to the first kind responser of this question!

The MWE is provided below

mydf.1 <- data.frame(age=factor(c(0:4,6:10),levels=c(0:4,6:10),ordered=TRUE), Freq=c(1,2,3,7,8,9,10,5,10,10))
mydf.2 <- data.frame(age=factor(6:10,levels=c(6:10),ordered=TRUE),Freq=c(10,5,10,10,21))
mydf.3 <- data.frame(age=factor(0:3,levels=c(0:3),ordered=TRUE),Freq=c(1,5,9,4))
mydf.list <- list(mydf.1,mydf.2,mydf.3)
mydf.list
#desired output
myout.1 <- data.frame(age=factor(0:10,levels=c(0:10),ordered=TRUE), Freq=c(1,2,3,7,8,0,9,10,5,10,10))
myout.2 <- data.frame(age=factor(0:10,levels=c(0:10),ordered=TRUE),Freq=c(0,0,0,0,0,0,10,5,10,10,21))
myout.3 <- data.frame(age=factor(0:10,levels=c(0:10),ordered=TRUE),Freq=c(1,5,9,4,0,0,0,0,0,0,0))
myout.list <- list(myout.1,myout.2,myout.3)
myout.list
#My best unsuccessful attempt
mydf.empty <- data.frame(age=factor(0:10,levels=c(0:10),ordered=TRUE),Freq=c(rep(0,11)))
myrows <- list()
for (i in c(1:3)){
myrows[[i]] <- rownames(mydf.empty)[rownames(mydf.empty) %in% rownames(mydf.list[[i]])]
}
myrows

Any helps would be greatly appreciated. Thank you!

Stef


Solution

  • Following only targets 2nd revision (list of frames with a factor column), which is quite different from the original question (list of matrices)


    You could first unify the levels of list of factors with forcats::fct_unify() , once original age values are replaced with unified variants, tidyr::complete() can turn implicit missing values into explicit missing values, i.e. complete a frame with all age levels.

    purrr::map() is used to extract a column from a list of frames, returns a list of age factors.
    purrr::map2() is used to iterate over pairs of items in mydf.list & list of age.

    purrr::map2(
      mydf.list, 
      purrr::map(mydf.list, "age") |> forcats::fct_unify(),
      \(df, age_unif_lvl)  dplyr::mutate(df, age = age_unif_lvl) |> tidyr::complete(age, fill = list(Freq = 0))
    )
    #> [[1]]
    #> # A tibble: 10 × 2
    #>    age    Freq
    #>    <ord> <dbl>
    #>  1 0         1
    #>  2 1         2
    #>  3 2         3
    #>  4 3         7
    #>  5 4         8
    #>  6 6         9
    #>  7 7        10
    #>  8 8         5
    #>  9 9        10
    #> 10 10       10
    #> 
    #> [[2]]
    #> # A tibble: 10 × 2
    #>    age    Freq
    #>    <ord> <dbl>
    #>  1 0         0
    #>  2 1         0
    #>  3 2         0
    #>  4 3         0
    #>  5 4         0
    #>  6 6        10
    #>  7 7         5
    #>  8 8        10
    #>  9 9        10
    #> 10 10       21
    #> 
    #> [[3]]
    #> # A tibble: 10 × 2
    #>    age    Freq
    #>    <ord> <dbl>
    #>  1 0         1
    #>  2 1         5
    #>  3 2         9
    #>  4 3         4
    #>  5 4         0
    #>  6 6         0
    #>  7 7         0
    #>  8 8         0
    #>  9 9         0
    #> 10 10        0
    

    Example data:

    mydf.1 <- data.frame(age=factor(c(0:4,6:10),levels=c(0:4,6:10),ordered=TRUE), Freq=c(1,2,3,7,8,9,10,5,10,10))
    mydf.2 <- data.frame(age=factor(6:10,levels=c(6:10),ordered=TRUE),Freq=c(10,5,10,10,21))
    mydf.3 <- data.frame(age=factor(0:3,levels=c(0:3),ordered=TRUE),Freq=c(1,5,9,4))
    mydf.list <- list(mydf.1,mydf.2,mydf.3)
    str(mydf.list)
    #> List of 3
    #>  $ :'data.frame':    10 obs. of  2 variables:
    #>   ..$ age : Ord.factor w/ 10 levels "0"<"1"<"2"<"3"<..: 1 2 3 4 5 6 7 8 9 10
    #>   ..$ Freq: num [1:10] 1 2 3 7 8 9 10 5 10 10
    #>  $ :'data.frame':    5 obs. of  2 variables:
    #>   ..$ age : Ord.factor w/ 5 levels "6"<"7"<"8"<"9"<..: 1 2 3 4 5
    #>   ..$ Freq: num [1:5] 10 5 10 10 21
    #>  $ :'data.frame':    4 obs. of  2 variables:
    #>   ..$ age : Ord.factor w/ 4 levels "0"<"1"<"2"<"3": 1 2 3 4
    #>   ..$ Freq: num [1:4] 1 5 9 4
    
    #desired output
    myout.1 <- data.frame(age=factor(0:10,levels=c(0:10),ordered=TRUE), Freq=c(1,2,3,7,8,0,9,10,5,10,10))
    myout.2 <- data.frame(age=factor(0:10,levels=c(0:10),ordered=TRUE),Freq=c(0,0,0,0,0,0,10,5,10,10,21))
    myout.3 <- data.frame(age=factor(0:10,levels=c(0:10),ordered=TRUE),Freq=c(1,5,9,4,0,0,0,0,0,0,0))
    myout.list <- list(myout.1,myout.2,myout.3)
    str(myout.list)
    #> List of 3
    #>  $ :'data.frame':    11 obs. of  2 variables:
    #>   ..$ age : Ord.factor w/ 11 levels "0"<"1"<"2"<"3"<..: 1 2 3 4 5 6 7 8 9 10 ...
    #>   ..$ Freq: num [1:11] 1 2 3 7 8 0 9 10 5 10 ...
    #>  $ :'data.frame':    11 obs. of  2 variables:
    #>   ..$ age : Ord.factor w/ 11 levels "0"<"1"<"2"<"3"<..: 1 2 3 4 5 6 7 8 9 10 ...
    #>   ..$ Freq: num [1:11] 0 0 0 0 0 0 10 5 10 10 ...
    #>  $ :'data.frame':    11 obs. of  2 variables:
    #>   ..$ age : Ord.factor w/ 11 levels "0"<"1"<"2"<"3"<..: 1 2 3 4 5 6 7 8 9 10 ...
    #>   ..$ Freq: num [1:11] 1 5 9 4 0 0 0 0 0 0 ...
    
    

    Created on 2024-10-22 with reprex v2.1.1