rimportmergedataset

import and merge dataset all in one step via iterative function?


These are the datasets I'm working on.

 AA‑M‑LUM‑STD.xls
 AA‑M‑LUM‑ALT.xls
 AA‑M‑CON‑ALT.xls
 AA‑N‑LUM‑STD.xls
 AA‑N‑LUM‑ALT.xls
 AA‑N‑CON‑ALT.xls

Since the M-ones have the following structure

# A tibble: 25 x 5
   CODE  GP    BLOCK  TYPE                 `ACT(300-400).AXr`
   <chr> <chr> <chr>  <chr>                    <dbl>
 1 A01   GRP1  S1     LUM-STD                   5.82 
 2 A04   GRP1  S1     LUM-STD                  -0.15 
 3 A06   GRP1  S1     LUM-STD                   2.73 
 4 A07   GRP1  S1     LUM-STD                   3.19 
 5 A08   GRP1  S1     LUM-STD                  -1.92 
 6 A09   GRP1  S1     LUM-STD                   1.42 
 7 A10   GRP1  S1     LUM-STD                   0.76 
 8 A11   GRP1  S1     LUM-STD                  -2.31 
 9 A12   GRP1  S1     LUM-STD                   7.63 
10 A13   GRP1  S1     LUM-STD                   4.21

While the N-ones have this following ones, with a more extended number of coulmns:

# A tibble: 25 x 16
   CODE  GP   BLOCK TYPE     ACT(300-400).AXr ACT(300-400).BXr ACT(300-400).CXr RESPearly(400-600).AXr RESPearly(400-600).BXr RESPearly(400-600).CXr RESP1(600-900).AXr RESP1(600-900).BXr RESP1(600-900).CXr RESP2(900-1200).AXr RESP2(900-1200).BXr RESP2(900-1200).CXr
   <chr> <chr> <chr> <chr>             <dbl>             <dbl>             <dbl>                  <dbl>                  <dbl>                  <dbl>               <dbl>               <dbl>               <dbl>                 <dbl>                 <dbl>                 <dbl>
 1 A01   DS    G1    LUM-STD          -10.3              -3.89              9.25                -10.2                   -4.95                    7.91               -3.12                2.76                5.11                   2.44                  1.20                  4.50
 2 A04   DS    G1    LUM-STD           -0.51              2.88             14.2                 -5.12                   -2.33                    5.87               -0.77                3.94                6.25                   0.24                  0.72                  6.49
 3 A06   DS    G1    LUM-STD           -0.32             -1.04              2.97                 3.89                    3.01                    7.15                1.66                2.51                4.84                   1.05                  0.86                  4.02
 4 A07   DS    G1    LUM-STD           -2.59              1.96             12.7                 -0.94                    5.34                   11.3                 0.37                4.02               12.5                    1.93                  2.01                 12.6 
 5 A08   DS    G1    LUM-STD            3.98              1.72              6.79                 1.35                    1.01                    5.02                1.44                2.13                4.75                   0.93                  0.56                  4.92
 6 A09   DS    G1    LUM-STD            8.23             12.9              21.7                 11.9                    9.87                   17.8                 5.65                4.78                9.41                   5.04                  3.99                  9.94
 7 A10   DS    G1    LUM-STD           -5.67             -3.94              5.92                 0.12                   -0.44                    6.12                1.26                0.65                5.77                   0.93                  1.18                  5.93
 8 A11   DS    G1    LUM-STD            1.23              4.89             12.4                  2.33                    5.02                   10.9                 3.12                5.07               11.2                    3.61                  2.77                 11.6 
 9 A12   DS    G1    LUM-STD           -6.77             -1.94             14.3                 -7.12                   -2.87                   6.52               -1.88               -0.29                5.82                   0.01                  1.32                  6.04
10 A13   DS    G1    LUM-STD            2.42              3.89              8.67                 1.44                    3.21                    8.12                0.99                3.32                8.76                   1.87                  2.54                  9.10
# ... with 15 more rows

I've used the following procedure for importing and merging them together:

for (i in 1:length(files)) {
  assign(gsub("\\.xls", "", files[i]), readxl::read_xls(files[i]))
}

data <- `AA-M-LUM-STD` %>%
  add_row(`AA-M-LUM-ALT`) %>%
  add_row(`AA-M-CON-ALT`) %>%
  arrange(SAMPLE)

data1 <- `AA-POz-LUM-STD` %>%
  add_row(`AA-POz-LUM-ALT`) %>%
  add_row(`AA-POz-CON-ALT`) %>%
  arrange(SAMPLE)

final_data <- merge(data, data1, by = c('SAMPLE', 'GROUP', 'SESSION', 'CATEGORY'))

Since trying by this following code I've obtain a non-matched merged dataset:

filenames <- list.files(pattern = '^AA.*\\.xls$') 
data <- purrr::map_df(filenames, readxl::read_excel)

In the case I would ike to import already merged in a proper way, what am I suppsed to do/adjust?

Thanks in advance


Solution

  • Maybe we need map2

    library(dplyr)
    library(purrr)
    library(stringr)
    filesv <- list.files(pattern = 'RP-V-.*\\.xls', full.names = TRUE)
    filesp <- list.files(pattern = 'RP-P-.*\\.xls', full.names = TRUE)
    nm1 <- str_c(str_remove(basename(filesv), "\\.xls"), 
                 str_remove(basename(filesp), "\\.xls"), sep="_")
    out <- map2(filesv, filesp, ~ {
               vdat <- readxl::read_excel(.x)
               pdat <- readxl::read_excel(.y)
               inner_join(vdat, pdat, by = c('ID', 'GR', 'SES', 'COND'))
      }) %>%
        setNames(nm1) %>%
        bind_rows(.id = 'grp')
    

    Or if we don't need the 'grp' column

    out <- map2_dfr(filesv, filesp, ~ {
               vdat <- readxl::read_excel(.x)
               pdat <- readxl::read_excel(.y)
               inner_join(vdat, pdat, by = c('ID', 'GR', 'SES', 'COND'))
      })