rmultiple-columnsreshape2

How to combine multiple columns to single columns (by header name)?


I currently have an Excel document that looks like this in a simplified way:

   A1   A2   B1   B2    B3   C1   C2   ...   H12
   1    5    11   14    15   19   22   ...   ...
   2    6    12         16   20   23
   3    7    13         17   21   24
   4    8               18        25
        9                         26
        10                        27

And I have to combine the columns in two different ways for two different resulting documents. First of all, I have to combine the columns in a way that all the columns containing only the number 1 are combined, which would result in a document like this:

   Column 1   Column 2   Column 3   ...   Column 12 
   1          5          15         ...   ...
   2          6          16
   3          7          17
   4          8          18
   11         9
   12         10
   13         14
   19         22
   20         23
   21         24
              25
              26
              27

And for the second way, I would need the columns to be combined in a way that all the original columns starting with the same number are mereged into one column, such as:

   Column A   Column B   Column C   ...   Column H
   1          11         19         ...   ...
   2          12         20
   3          13         21
   4          14         22
   5          15         23
   6          16         24
   7          17         25
   8          18         26
   9                     27
   10

The headers of the file to transform are numbered from A1 up to H12. This means that the first resulting document should have 12 columns in the end and the second document 8 columns.

I have already tried to do so with rbind, but I didn't manage to do so.

Could anybody please help me with this problem?

Thank you very much already in advance!


Solution

  • There are definitely more elegant ways, but without knowing how your header or your data looks like in reality, this would be a relatively magic-free path:

    (Disregarding the Excel part of the post, which would probably be more suited in a separate question, if it's a question at all)

    Data

    > df <- structure(list(A1=c(1L,2L,3L,4L,NA,NA),A2=5:10,B1=c(11L,12L,13L,NA,NA,NA),B2=c(14L,NA,NA,NA,NA,NA),B3=c(15L,16L,17L,18L,NA,NA),C1=c(19L,20L,21L,NA,NA,NA),C2=22:27,H12=c(28:30,NA,NA,NA)),row.names=c(NA,-6L),class=c("data.frame"))
    

    Code

    Put data into long form

    library(tidyverse)
    # Put data.frame into long format
    df_long <- df %>%
        # Put data into long format
        gather(key, val) %>%  #you could also use pivot_longer()
        drop_na() %>%
        # Separate letters and numbers
        separate_wider_regex(cols = key, patterns = c(letter = "[^0-9]+", number = "[0-9]+")) %>%
        # Sort
        arrange(letter, number) 
    
    # Result
    > head(df_long)
      letter number val
    1      A      1   1
    2      A      1   2
    3      A      1   3
    4      A      1   4
    5      A      2   5
    6      A      2   6
    ... 
    

    Generate letters table

    # By letters
    df_letters <- df_long %>%
        # Drop number col
        select(-number) %>%
        # Add rolling ID
        group_by(letter) %>%
        mutate(id = row_number()) %>%
        ungroup() %>%
        # Spread into wide format
        pivot_wider(
            id_cols = id, names_from = letter, names_prefix = "Column_", values_from = val
        ) %>%
        select(-id)
    
    # Result
    
    > df_letters
    # A tibble: 10 × 4
       Column_A Column_B Column_C Column_H
          <int>    <int>    <int>    <int>
     1        1       11       19       28
     2        2       12       20       29
     3        3       13       21       30
     4        4       14       22       NA
     5        5       15       23       NA
     6        6       16       24       NA
     7        7       17       25       NA
     8        8       18       26       NA
     9        9       NA       27       NA
    10       10       NA       NA       NA
    

    Generate numbers table

    # By numbers
    df_numbers <- df_long %>%
        # Drop letter col
        select(-letter) %>%
        # Add rolling ID
        group_by(number) %>%
        mutate(id = row_number()) %>%
        ungroup() %>%
        # Spread into wide format
        pivot_wider(
            id_cols = id, names_from = number, names_prefix = "Column_", values_from = val
        ) %>%
        # Drop ID col
        select(-id)
    
    # Result
    > df_numbers
    # A tibble: 13 × 4
       Column_1 Column_2 Column_3 Column_12
          <int>    <int>    <int>     <int>
     1        1        5       15        28
     2        2        6       16        29
     3        3        7       17        30
     4        4        8       18        NA
     5       11        9       NA        NA
     6       12       10       NA        NA
     7       13       14       NA        NA
     8       19       22       NA        NA
     9       20       23       NA        NA
    10       21       24       NA        NA
    11       NA       25       NA        NA
    12       NA       26       NA        NA
    13       NA       27       NA        NA