I have a dataframe df1
df1<- setNames(data.frame(matrix(ncol = 3, nrow = 37)), c("material","condition", "pID")) df1$material <- c("FBZOIKS","FBZOIKS","FBZOIKS","FBZOIKS","VNTYALQ","VNTYALQ","VNTYALQ","HMRCJXU","HMRCJXU","HMRCJXU","HMRCJXU","HMRCJXU","CURHJXM","UXJMRCH","UXJMRCH","XMRCUJH","XMRCUJH","XMRCUJH","FBZOIKS","FBZOIKS", "FBZOIKS","FBZOIKS","VNTYALQ","VNTYALQ","VNTYALQ","VNTYALQ","HMRCJXU","HMRCJXU","HMRCJXU","HMRCJXU","CURHJXM","CURHJXM","UXJMRCH","UXJMRCH","XMRCUJH","XMRCUJH","XMRCUJH") df1$condition <- c("false"," "," "," "," "," "," "," "," "," "," ","","false"," "," "," "," ",""," false"," ", " "," "," "," "," "," "," "," "," "," "," false"," "," "," "," "," ","") df1$pID <- c("p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1"," p1","p1"," p2"," p2", " p2"," p2"," p2"," p2"," p2"," p2"," p2"," p2"," p2"," p2"," p2"," p2"," p2"," p2"," p2"," p2","p2")
and I need to create two columns grouping them by pID: block
and Nletters_block
.
For block, I need to identify the first 'false' in the column condition
and give a value of 1 until the next 'false' for that pID is identified. When the next is identified, I need to assign a value of 2. If a next is identified I need to assign a value of 3 and so on.
For Nletters_block, I need to calculate for each participant and block, the unique number of letters that are embedded in the column material
.
If I could use the dplyr
library, I would be even better.
Below is what I would like to obtain:
material condition pID block Nletters_block
FBZOIKS false p1 1 21
FBZOIKS p1 1 21
FBZOIKS p1 1 21
FBZOIKS p1 1 21
VNTYALQ p1 1 21
VNTYALQ p1 1 21
VNTYALQ p1 1 21
HMRCJXU p1 1 21
HMRCJXU p1 1 21
HMRCJXU p1 1 21
HMRCJXU p1 1 21
HMRCJXU p1 1 21
CURHJXM false p1 2 7
UXJMRCH p1 2 7
UXJMRCH p1 2 7
XMRCUJH p1 2 7
XMRCUJH p1 2 7
XMRCUJH p1 2 7
FBZOIKS false p2 1 21
FBZOIKS p2 1 21
FBZOIKS p2 1 21
FBZOIKS p2 1 21
VNTYALQ p2 1 21
VNTYALQ p2 1 21
VNTYALQ p2 1 21
VNTYALQ p2 1 21
HMRCJXU p2 1 21
HMRCJXU p2 1 21
HMRCJXU p2 1 21
HMRCJXU p2 1 21
CURHJXM false p2 2 7
CURHJXM p2 2 7
UXJMRCH p2 2 7
UXJMRCH p2 2 7
XMRCUJH p2 2 7
XMRCUJH p2 2 7
XMRCUJH p2 2 7
First grouping by pID then by pID and block. Counting the characters by pasting all material and subsequently splitting.
library(dplyr)
df1 %>%
mutate(block = cumsum(condition == "false"), .by = pID) %>%
mutate(Nletters_block = length(unique(unlist(strsplit(
paste(material, collapse=""), "")))),
.by = c(pID, block))
material condition pID block Nletters_block
1 FBZOIKS false p1 1 21
2 FBZOIKS p1 1 21
3 FBZOIKS p1 1 21
4 FBZOIKS p1 1 21
5 VNTYALQ p1 1 21
6 VNTYALQ p1 1 21
7 VNTYALQ p1 1 21
8 HMRCJXU p1 1 21
9 HMRCJXU p1 1 21
10 HMRCJXU p1 1 21
11 HMRCJXU p1 1 21
12 HMRCJXU p1 1 21
13 CURHJXM false p1 2 7
14 UXJMRCH p1 2 7
15 UXJMRCH p1 2 7
16 XMRCUJH p1 2 7
17 XMRCUJH p1 2 7
18 XMRCUJH p1 2 7
19 FBZOIKS false p2 1 21
20 FBZOIKS p2 1 21
21 FBZOIKS p2 1 21
22 FBZOIKS p2 1 21
23 VNTYALQ p2 1 21
24 VNTYALQ p2 1 21
25 VNTYALQ p2 1 21
26 VNTYALQ p2 1 21
27 HMRCJXU p2 1 21
28 HMRCJXU p2 1 21
29 HMRCJXU p2 1 21
30 HMRCJXU p2 1 21
31 CURHJXM false p2 2 7
32 CURHJXM p2 2 7
33 UXJMRCH p2 2 7
34 UXJMRCH p2 2 7
35 XMRCUJH p2 2 7
36 XMRCUJH p2 2 7
37 XMRCUJH p2 2 7
df1 <- structure(list(material = c("FBZOIKS", "FBZOIKS", "FBZOIKS",
"FBZOIKS", "VNTYALQ", "VNTYALQ", "VNTYALQ", "HMRCJXU", "HMRCJXU",
"HMRCJXU", "HMRCJXU", "HMRCJXU", "CURHJXM", "UXJMRCH", "UXJMRCH",
"XMRCUJH", "XMRCUJH", "XMRCUJH", "FBZOIKS", "FBZOIKS", "FBZOIKS",
"FBZOIKS", "VNTYALQ", "VNTYALQ", "VNTYALQ", "VNTYALQ", "HMRCJXU",
"HMRCJXU", "HMRCJXU", "HMRCJXU", "CURHJXM", "CURHJXM", "UXJMRCH",
"UXJMRCH", "XMRCUJH", "XMRCUJH", "XMRCUJH"), condition = c("false",
"", "", "", "", "", "", "", "", "", "", "", "false", "", "",
"", "", "", "false", "", "", "", "", "", "", "", "", "", "",
"", "false", "", "", "", "", "", ""), pID = c("p1", "p1", "p1",
"p1", "p1", "p1", "p1", "p1", "p1", "p1", "p1", "p1", "p1", "p1",
"p1", "p1", "p1", "p1", "p2", "p2", "p2", "p2", "p2", "p2", "p2",
"p2", "p2", "p2", "p2", "p2", "p2", "p2", "p2", "p2", "p2", "p2",
"p2")), class = "data.frame", row.names = c(NA, -37L))