I have some data in the format found in the code chunk.
dat <- data.frame(fruit=c('apple',NA,NA,NA,NA,NA,NA,NA,NA,
'pear',NA,NA,NA,NA,NA,NA,NA,NA,
'orange',NA,NA,NA,NA,NA,NA,NA,NA),
fertiliser=c('phosphorus',NA,NA,
'potassium',NA,NA,
'Nitrogen',NA,NA,
'phosphorus',NA,NA,
'potassium',NA,NA,
'Nitrogen',NA,NA,
'phosphorus',NA,NA,
'potassium',NA,NA,
'Nitrogen',NA,NA),
method=rep(c('a','b','c'),9),
no_of_fruits=c(rep(5,3),rep(2,3),rep(6,3),rep(8,3),rep(7,3),rep(4,3),rep(3,3),rep(1,3),rep(9,3)),
mean=c(rnorm(27, 50, 20)),
lower_ci=c(rnorm(27,20,20)),
upper_ci=c(rnorm(27,80,20)),
pval=c(rnorm(27,0.005,0.001)),
x=c(rnorm(1,0.004,0.05),NA,NA,
rnorm(1,0.004,0.05),NA,NA,
rnorm(1,0.004,0.05),NA,NA,
rnorm(1,0.004,0.05),NA,NA,
rnorm(1,0.004,0.05),NA,NA,
rnorm(1,0.004,0.05),NA,NA,
rnorm(1,0.004,0.05),NA,NA,
rnorm(1,0.004,0.05),NA,NA,
rnorm(1,0.004,0.05),NA,NA),
y=c(rnorm(1,0.004,0.0003),NA,NA,
rnorm(1,0.004,0.0003),NA,NA,
rnorm(1,0.004,0.0003),NA,NA,
rnorm(1,0.004,0.0003),NA,NA,
rnorm(1,0.004,0.0003),NA,NA,
rnorm(1,0.004,0.0003),NA,NA,
rnorm(1,0.004,0.0003),NA,NA,
rnorm(1,0.004,0.0003),NA,NA,
rnorm(1,0.004,0.0003),NA,NA))
There are 27 rows of data in the above dataframe, but I'd like to be able to have 6 rows in total in the new dataset: 6 fruit rows, and 3 fertiliser rows for each fruit row.
To turn the data into long format, I want to have each cell in the method row in the original dat
dataframe to be a separate column, so each individual row in the new dataset would be:
| fruit | fertiliser | no_of_fruits | a_mean | a_lower_ci | a_upper_ci | a_pval | a_x | a_y | b_mean | b_lower_ci | b_upper_ci | b_pval | b_x | b_y | c_mean | c_lower_ci | c_upper_ci | c_pval | c_x | c_y |
Hope this makes sense!
I tried using for loops to tackle this, but my attempt seems quite clumsy...
library(tidyverse)
methods <- unique(dat$method)
fert <- unique(dat$fertiliser) # with NAs
fert <- na.omit(fert) # without NAs
fru <- unique(dat$fruit) # with NAs
fru <- na.omit(fru) # without NAs
dat_2 <- data.frame()
longer_row <- NULL
p = 1
# for loop to go through each row in finalised_results_children
for(x in p:length(dat)){
# the above for loop goes through each fruit:
# apple, pear, orange
for(a in 1:length(methods)){
# the above for loop goes through each of 3 methods:
# a,b,c
method = dat[a,3]
results=dat[x,4:8]
# rename columns to e.g. 'a_mean'
columns = colnames(dat[,4:8]) %>%
str_replace_all(., ' ', '_')
columns = paste(method, columns, sep = '_')
colnames(results) = columns
# this seems to be where the problem is!
# we want to create a (1x(5*3 methods) + x + y) row
if(is.null(longer_row)){
longer_row = results
longer_row = cbind(data.frame(fruit=fru[x]), data.frame(fertiliser=fer[x]), longer_row, dat[x, 9:10])
}else{
longer_row = cbind(longer_row, results)
}
}
#
dat_2 <- rbind(dat_2, longer_row)
longer_row = NULL
p = p+3
}
This is what I get. Any help would be much appreciated!
library(tidyverse)
fill(dat,fruit, fertiliser) %>%
pivot_wider(names_from = method, values_from = mean:y,
names_vary = 'slowest',
names_glue = '{method}_{.value}')
# A tibble: 9 × 21
fruit fertiliser no_of_fruits a_mean a_lower_ci a_upper_ci a_pval a_x a_y b_mean
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 apple phosphorus 5 54.6 15.7 66.6 0.00507 0.0541 0.00445 64.6
2 apple potassium 2 35.2 27.8 75.8 0.00492 0.00354 0.00406 27.6
3 apple Nitrogen 6 54.5 20.6 40.5 0.00420 -0.00259 0.00457 40.9
4 pear phosphorus 8 56.4 -28.2 58.7 0.00558 -0.00305 0.00354 49.2
5 pear potassium 7 55.3 25.9 116. 0.00413 -0.0480 0.00388 73.4
6 pear Nitrogen 4 34.8 13.7 95.8 0.00415 0.0385 0.00390 60.2
7 orange phosphorus 3 17.3 44.9 137. 0.00492 0.00892 0.00477 29.5
8 orange potassium 1 18.3 40.5 72.1 0.00480 0.0525 0.00398 56.0
9 orange Nitrogen 9 45.3 41.2 63.4 0.00454 -0.00916 0.00390 39.1
# ℹ 11 more variables: b_lower_ci <dbl>, b_upper_ci <dbl>, b_pval <dbl>, b_x <dbl>, b_y <dbl>,
# c_mean <dbl>, c_lower_ci <dbl>, c_upper_ci <dbl>, c_pval <dbl>, c_x <dbl>, c_y <dbl>