rdata.tablegrepl

How can I reorder certain columns by a pattern within a data.table?


I have a data.table with the following columns names

> names(test)
 [1] "Week"                "PSRS_Argentina"      "PSRS_Australia"      "PSRS_Belgium"        "PSRS_Canada"         "PSRS_France"        
 [7] "PSRS_Germany"        "PSRS_Hungary"        "PSRS_Israel"         "PSRS_Italy"          "PSRS_Japan"          "PSRS_Korea, South"  
[13] "PSRS_Peru"           "PSRS_Poland"         "PSRS_Russia"         "PSRS_Spain"          "PSRS_Taiwan"         "PSRS_United Kingdom"
[19] "PSRS_United States"  "AR_Argentina"        "AR_Australia"        "AR_Belgium"          "AR_Canada"           "AR_France"          
[25] "AR_Germany"          "AR_Hungary"          "AR_Israel"           "AR_Italy"            "AR_Japan"            "AR_Korea, South"    
[31] "AR_Peru"             "AR_Poland"           "AR_Russia"           "AR_Spain"            "AR_Taiwan"           "AR_United Kingdom"  
[37] "AR_United States"    "totalPSRS"           "totalAR"  

I'm trying to reorder the column names of those with only country names. I realize I may have to do this in more than one step but how can I got about grouping the PSRS_ and AR_ so that I would get something like this:

[1] "Week"   "PSRS_Argentina"   "AR_Argentina"   "PSRS_Australia"   "AR_Australia" ... "totalPSRS" "totalAR"

I've seen some use of grepl or grep that would may be helpful here but not with data.tables. I've tried doing something like this:

test[order(test)][order(-(grepl('*_[A-Z]',test[order(test)]))+1L]

But no sorting of the columns occurred.


Solution

  • This should work.

    ## I use the stringr package
    ### I find it easier to understand
    require(stringr)
    #> Loading required package: stringr
    
    test <- data.frame(
      Week = c("week1", "week2", "week3"), 
      PSRS_Germany = c("some", "data", "idk"),
      PSRS_Israel = c("this", "is", "data"),
      AR_Germany = c("yes", "it's", "data"),
      AR_Israel = c("hmm", "look", "values"),
      totalPSRS = c("yes", "yeah", "hmm"),
      totalAR = c("foo", "bar", "BIFF!")
    )
    
    test
    #>    Week PSRS_Germany PSRS_Israel AR_Germany AR_Israel totalPSRS totalAR
    #> 1 week1         some        this        yes       hmm       yes     foo
    #> 2 week2         data          is       it's      look      yeah     bar
    #> 3 week3          idk        data       data    values       hmm   BIFF!
    
    ## Get just the names we want to rearrange
    testNames <- names(test)
    testNames <- testNames[str_detect(testNames, "_")]
    
    ## Rearragne those names
    orderedNames <- 
      testNames[
        order(str_extract(testNames, "_.+(?=$)"),   # order by what's between string start and '_'
              str_extract(testNames, "(?<=^).+_"))  # _then_ by what's between '_' and string end
      ]
    
    ## Names that we are not re-ordering:
    otherNames <- names(test)[!names(test) %in% c(orderedNames, "Week")]
    
    ## Index using newly rearranged names
    test[,c("Week", orderedNames, otherNames)]
    #>    Week AR_Germany PSRS_Germany AR_Israel PSRS_Israel totalPSRS totalAR
    #> 1 week1        yes         some       hmm        this       yes     foo
    #> 2 week2       it's         data      look          is      yeah     bar
    #> 3 week3       data          idk    values        data       hmm   BIFF!
    

    Created on 2021-09-13 by the reprex package (v2.0.1)