rfor-loopif-statementconditional-statements

Making four new columns based on eight existing columns


I have eight columns called INT1:INT4 and INTX1:INTX4. For each row, there is a maximum of four non-NA values. I need to create four new variables called ING1:ING4 and assign values row-wise from the eight existing columns:

For example, I would expect the new columns in row 1 to look like:

... ING1 ING2 ING3 ING4
... 245005 276790 NA NA

My data:

DATA <- structure(list(ID = c("101", "101", "101", "101", "101", "101","101",
                              "101", "101", "101"),
                       IDA = c("1", "1", "2", "3", "4","5", "5", "1859",
                               "1860", "1861"),
                       DATE = structure(c(1300928400,1277946000, 1277946000,
                                          1278550800, 1278550800, 1453770000,
                                          1329958800,1506474000, 1485133200,
                                          1485133200), tzone = "UTC",
                                        class = c("POSIXct","POSIXt")),
                       NR = c("CH-0001", "CH-0001","CH-0002", "CH-0003",
                              "CH-0004", "CH-0005","CH-0005", "CH-1859",
                              "CH-1860", "CH-1861"),
                       PAT = c("101-1", "101-1", "101-2", "101-3", "101-4",
                               "101-5","101-5", "101-1859", "101-1860",
                               "101-1861"),
                       INT1 = c(245005,280040, 280040, 280040, 280040, 240040,
                                240040, NA, NA, NA),
                       INT2 = c(NA_real_, NA_real_, NA_real_, NA_real_,
                                NA_real_,NA_real_, NA_real_, NA_real_,
                                NA_real_, NA_real_),
                       INT3 = c(NA_real_,NA_real_, 280010, NA_real_, NA_real_,
                                NA_real_, NA_real_,NA_real_, 245035, NA_real_),
                       INT4 = c(NA_real_, NA_real_,NA_real_, NA_real_, NA_real_,
                                NA_real_, NA_real_, NA_real_,NA_real_, NA_real_),
                       INTX1 = c(NA_real_, 275040, NA_real_,NA_real_, NA_real_,
                                 NA_real_, 240080, NA_real_, NA_real_,NA_real_),
                       INTX2 = c(276790, NA_real_, 7612645, NA_real_,NA_real_,
                                 NA_real_, 5078219, NA_real_, NA_real_, NA_real_),
                       INTX173 = c(NA_real_, NA_real_, NA_real_, 3456878,NA_real_,
                                   NA_real_, 3289778, NA_real_, NA_real_, NA_real_),
                       INTX4 = c(NA_real_, NA_real_, 11198767, NA_real_,NA_real_,
                                 NA_real_, NA_real_, NA_real_, NA_real_, 7025676),
                       KAT = c(0, 0, 0, 0, 0, 0, 0, 1, 1, 1)),
                  row.names = c(NA,-10L),
                  class = c("tbl_df", "tbl", "data.frame"))

Solution

  • Try this:

    fun <- function(select, prefix = "ING", ncol = -1, data = cur_data()) {
      select <- substitute(select)
      out <- asplit(t(
        apply(subset(data, select = eval(select)), 1, function(z) z[order(is.na(z))])
        ), 2)
      names(out) <- paste0(prefix, seq_along(out))
      if (ncol > 0) out <- out[seq_len(ncol)]
      do.call(data.frame, out)
    }
    

    Parts:

    And its use:

    dplyr

    library(dplyr)
    DATA %>%
      mutate(fun(INT1:INTX4, ncol=4))
    # # A tibble: 10 × 18
    #    ID    IDA   DATE                    NR      PAT        INT1  INT2   INT3  INT4  INTX1   INTX2 INTX173    INTX4   KAT    ING1    ING2    ING3     ING4
    #    <chr> <chr> <dttm>                  <chr>   <chr>     <dbl> <dbl>  <dbl> <dbl>  <dbl>   <dbl>   <dbl>    <dbl> <dbl>   <dbl>   <dbl>   <dbl>    <dbl>
    #  1 101   1     2011-03-24 01:00:00.000 CH-0001 101-1    245005    NA     NA    NA     NA  276790      NA       NA     0  245005  276790      NA       NA
    #  2 101   1     2010-07-01 01:00:00.000 CH-0001 101-1    280040    NA     NA    NA 275040      NA      NA       NA     0  280040  275040      NA       NA
    #  3 101   2     2010-07-01 01:00:00.000 CH-0002 101-2    280040    NA 280010    NA     NA 7612645      NA 11198767     0  280040  280010 7612645 11198767
    #  4 101   3     2010-07-08 01:00:00.000 CH-0003 101-3    280040    NA     NA    NA     NA      NA 3456878       NA     0  280040 3456878      NA       NA
    #  5 101   4     2010-07-08 01:00:00.000 CH-0004 101-4    280040    NA     NA    NA     NA      NA      NA       NA     0  280040      NA      NA       NA
    #  6 101   5     2016-01-26 01:00:00.000 CH-0005 101-5    240040    NA     NA    NA     NA      NA      NA       NA     0  240040      NA      NA       NA
    #  7 101   5     2012-02-23 01:00:00.000 CH-0005 101-5    240040    NA     NA    NA 240080 5078219 3289778       NA     0  240040  240080 5078219  3289778
    #  8 101   1859  2017-09-27 01:00:00.000 CH-1859 101-1859     NA    NA     NA    NA     NA      NA      NA       NA     1      NA      NA      NA       NA
    #  9 101   1860  2017-01-23 01:00:00.000 CH-1860 101-1860     NA    NA 245035    NA     NA      NA      NA       NA     1  245035      NA      NA       NA
    # 10 101   1861  2017-01-23 01:00:00.000 CH-1861 101-1861     NA    NA     NA    NA     NA      NA      NA  7025676     1 7025676      NA      NA       NA
    

    base R

    cbind(DATA, fun(data = DATA, INT1:INTX4, ncol=4))
    #     ID  IDA                DATE      NR      PAT   INT1 INT2   INT3 INT4  INTX1   INTX2 INTX173    INTX4 KAT    ING1    ING2    ING3     ING4
    # 1  101    1 2011-03-24 01:00:00 CH-0001    101-1 245005   NA     NA   NA     NA  276790      NA       NA   0  245005  276790      NA       NA
    # 2  101    1 2010-07-01 01:00:00 CH-0001    101-1 280040   NA     NA   NA 275040      NA      NA       NA   0  280040  275040      NA       NA
    # 3  101    2 2010-07-01 01:00:00 CH-0002    101-2 280040   NA 280010   NA     NA 7612645      NA 11198767   0  280040  280010 7612645 11198767
    # 4  101    3 2010-07-08 01:00:00 CH-0003    101-3 280040   NA     NA   NA     NA      NA 3456878       NA   0  280040 3456878      NA       NA
    # 5  101    4 2010-07-08 01:00:00 CH-0004    101-4 280040   NA     NA   NA     NA      NA      NA       NA   0  280040      NA      NA       NA
    # 6  101    5 2016-01-26 01:00:00 CH-0005    101-5 240040   NA     NA   NA     NA      NA      NA       NA   0  240040      NA      NA       NA
    # 7  101    5 2012-02-23 01:00:00 CH-0005    101-5 240040   NA     NA   NA 240080 5078219 3289778       NA   0  240040  240080 5078219  3289778
    # 8  101 1859 2017-09-27 01:00:00 CH-1859 101-1859     NA   NA     NA   NA     NA      NA      NA       NA   1      NA      NA      NA       NA
    # 9  101 1860 2017-01-23 01:00:00 CH-1860 101-1860     NA   NA 245035   NA     NA      NA      NA       NA   1  245035      NA      NA       NA
    # 10 101 1861 2017-01-23 01:00:00 CH-1861 101-1861     NA   NA     NA   NA     NA      NA      NA  7025676   1 7025676      NA      NA       NA
    

    Edited to only sort on NA-ness, retaining the original order of non-null values.