rdplyrdata-wranglinglong-format-data

How to pivot four columns into two by adding a grouping label using dplyr?


Dataset

My simulated data looks like this:

combo <- structure(list(x1 = c(-0.184530460239927, 2.58876410608302, -0.376411897125282, 
-0.918020987034264, 0.267794817079365, 0.510331712403224, -0.0405173200334374, 
-0.138325588779885, 0.238838548324343, -0.878919653298651, 1.07733581268113, 
0.626064563540553, -1.89103128062022, 0.753938255887397, 0.100128777819584, 
0.988056031450007, -1.20492449398259, -0.435112080263483, -0.458684951607226, 
-1.09119839510576, -0.727138654781203, 0.502188856207333, -1.20147250343467, 
1.17932480876155, -0.425692938732958, 1.02236820748, 0.182053835725136, 
0.380227031433806, -0.270517738476701, -0.16272617439478, 0.220593768669037, 
-0.879461950243353, -0.0679190290036845, 1.1465390380024, 0.985536359381242, 
1.30651252236453, 0.416544302978179, -1.38776959348133, 0.26987007672326, 
-0.460563840153545, 1.26860780466628, -0.0822834394483887, -0.857094965973551, 
1.55353442853147, -0.362899956015105, -0.411375625012731, -0.774646267609922, 
-1.785780738652, 0.138298869848299, -0.830007579895058), x2 = c(10.1649997431391, 
13.7753955031784, 9.14611285885852, 8.89500487630507, 11.0792634435256, 
9.40604769694862, 10.9377081086486, 10.3149083885551, 11.4433718412604, 
8.01451483884253, 10.99588355029, 11.43348692627, 8.46187682949792, 
10.2754622062573, 9.72557919495809, 12.5346844833676, 8.72464111735834, 
9.57577516193854, 8.75255475821647, 9.72989475512324, 8.84014340654749, 
11.3804494571042, 7.66077694737602, 11.5186005504865, 9.08940573637054, 
10.8650309543967, 9.61420727102614, 10.927982330081, 9.29822635541754, 
7.76242176319227, 10.7176609762039, 7.83309960333225, 11.375953933858, 
10.5394984580639, 11.188721640198, 12.2932137170417, 11.4266622202742, 
8.78385129373517, 9.63834216468245, 8.65398006594213, 9.73423323110773, 
9.90806217362202, 8.2404184732306, 12.120678223943, 9.32402321768776, 
10.0725587879694, 7.6897109556945, 6.74738232297318, 9.46663373526324, 
9.33169452437223), y1 = c(0.531152563992138, 0.757703597905396, 
-0.791655222372092, -0.302099679513038, 0.671307589662749, 2.12637384156016, 
-0.909796697236723, -1.12913854196627, 1.67269183809139, -0.434990260871762, 
-0.0700467088361849, 0.542668245540057, 0.374830971405129, -1.63773988575198, 
-2.14682334438542, 0.901301020986636, -0.511720906474068, -1.05708841652644, 
-0.272051821060645, -0.79674724927485, -1.5268191640679, -0.907794043098101, 
-0.680495891271473, -0.35552898088709, -0.680683696561238, -0.844283268256992, 
1.27768418531443, -0.961233223427325, -0.671858202047372, -0.225776242419413, 
-0.686226261720162, 1.14668780673855, -0.579007489205267, 1.48425373519236, 
0.428834567252612, -0.211941122441431, -0.512591114561531, -0.0958428297906739, 
1.68860873380911, -0.751395710328223, 1.68278553230514, 0.726087302326806, 
-1.08262236469318, 0.3445461894278, 0.826204921157919, -0.635306319455706, 
-1.82487370480718, -2.11194816404115, 0.714777331685519, -0.345433989913914
), y2 = c(10.6220278853899, 11.6063941600105, 9.88059276478581, 
8.14957601722892, 9.16367249161337, 12.762654818962, 9.00183693282061, 
9.55789660927653, 12.9609244725328, 8.90467635039521, 10.8991427528596, 
9.89206203043643, 9.41991979079818, 7.84784105454833, 8.91365647664849, 
11.3667187570646, 9.40006788407869, 9.98750951320975, 9.00951922619663, 
9.97523890006792, 8.27153592598831, 11.0884143412798, 7.94368844719581, 
10.9469372108228, 8.15593832993733, 9.84078648878971, 11.8451838496249, 
10.8083866882212, 9.61132252103889, 11.1923799807399, 9.10045982731272, 
12.0544568795267, 9.24683481504971, 14.8962611606193, 10.881540894629, 
10.1335621049274, 9.016866497424, 11.2527950167179, 11.8814160661131, 
9.22010868094334, 12.9542930603357, 10.0271771753269, 7.95131787733545, 
9.31692314229138, 11.5295911568582, 8.30646740293857, 8.389129594333, 
7.35608073778492, 11.979325721996, 10.2744608966922), z = c(-0.206932655889915, 
1.59055947315421, 0.569858682145818, -0.39269341805629, 0.0560523995691307, 
-1.35745817399495, 0.757817217783636, 1.2363198027176, -0.705515328708522, 
0.0100215308812615, 1.1496786488628, -0.0189650703302578, -1.0373398254117, 
2.53958513880197, 2.4495884905998, 0.112672166475035, -0.646251528605393, 
0.727127843912292, -0.351583194283642, -0.181459517930261, 1.02924872657184, 
1.31035563724453, -0.337780896477732, 1.1791886765987, 0.330814927636109, 
1.73920234956884, -0.809834547447509, 0.472332306869409, 0.134382367942977, 
0.705445383659287, 0.845104590511669, -2.28815412147097, -0.331179738228726, 
-0.291691270546117, -0.104170550476615, 2.12138490798859, -0.140027608407499, 
-1.03938688051102, -0.846736197331773, -0.197364615019016, -0.420498039989205, 
-1.06172053607786, -0.511495900155817, 2.07372602503043, -1.25039171703876, 
-0.00379074839623933, 0.545098097507594, -0.744687068246333, 
-0.252078776784803, -0.75556398293816)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -50L))

Problem

Having trouble wording my question or considering how to find it, so I provide the example directly here for what I'm looking for. If you inspect the data, it looks like so:

# A tibble: 50 × 5
       x1    x2      y1    y2       z
    <dbl> <dbl>   <dbl> <dbl>   <dbl>
 1  0.220  9.08 -0.763  10.3   0.596 
 2  0.712 11.5   1.79   10.2  -0.836 
 3  1.26  12.3   0.0956  9.86  0.672 
 4 -1.27   7.62  1.61   10.7  -2.16  
 5  0.285 10.7  -0.0596 10.5  -0.357 
 6  0.303 11.1   0.613  10.9  -0.113 
 7 -0.618  7.90  1.56   10.1  -1.97  
 8  2.24  10.9  -0.0412  8.38  2.22  
 9  1.27  10.2   0.290   8.72  1.05  
10  0.230 10.2   0.392  10.5   0.0289
# ℹ 40 more rows
# ℹ Use `print(n = ...)` to see more rows

You can clearly see that there are variables labeled with a "1" and "2" here. I want a new column that specifies a Group 1 and Group 2 by pivoting their associated columns here. So x1 and y1 belong to Group 1 and x2 and y2 belong to Group 2. I already know how to do something similar to this using pivot_longer, but I'm struggling with figuring out how to simultaneously group these variables together while creating a new label column. The target data I want should look something like this, where x is simply a combo of x1 and x2 and y is a combo of y1 and y2, while group labels which group each person belongs to:

  group         x       y       z
   <chr>     <dbl>   <dbl>   <dbl>
 1 Group 1 -3.51    0.181  -0.0610
 2 Group 2  1.63    1.35    0.405 
 3 Group 1  0.271  -0.167  -0.0578
 4 Group 2 -0.776   0.580   0.0599
 5 Group 1 -1.48   -0.0631  0.966 
 6 Group 2  0.757  -2.10    2.07  
 7 Group 1 -1.37    0.462   0.882 
 8 Group 2  0.596   0.0123  0.156 
 9 Group 1 -0.494  -0.824  -1.18  
10 Group 2  0.783  -2.28   -1.40  
11 Group 1 -0.0388 -0.209   0.868 
12 Group 2 -0.255   0.121   1.07  
13 Group 1 -0.758   0.242  -0.665 
14 Group 2 -0.966   1.66    0.374 
15 Group 1 -0.595  -1.17   -0.612 
16 Group 2  1.12   -0.205  -0.484 
17 Group 1 -0.468  -0.404  -0.420 
18 Group 2 -0.361   0.257   1.89  
19 Group 1  0.165   0.989   1.50  
20 Group 2  0.681  -0.304   0.154

Solution

  • I believe you need a combination of names_pattern and a ".value" in names_to. From the documentation:

    ".value" indicates the corresponding component of the column name defines the name of the output column containing the cell values.

    library(tidyverse)
    
    combo |> 
      pivot_longer(-z, names_to = c(".value", "Group"), names_pattern = "([a-z])(\\d)") |> 
      relocate(-z)
    
    # A tibble: 100 × 4
       Group      x      y       z
       <chr>  <dbl>  <dbl>   <dbl>
     1 1     -0.185  0.531 -0.207 
     2 2     10.2   10.6   -0.207 
     3 1      2.59   0.758  1.59  
     4 2     13.8   11.6    1.59  
     5 1     -0.376 -0.792  0.570 
     6 2      9.15   9.88   0.570 
     7 1     -0.918 -0.302 -0.393 
     8 2      8.90   8.15  -0.393 
     9 1      0.268  0.671  0.0561
    10 2     11.1    9.16   0.0561
    # ℹ 90 more rows