I have a dataframe with some column names like; "Sample_ID", "Time00", "X7236Nr1", "Y844Nr1856", "X9834Nr21", "S844Nr567"
I want to add leading zeros to the digits after "Nr", so that I can convert it all to 4 digit numbers; "Sample_ID", "Time00", "X7236Nr0001", "Y844Nr1856", "X9834Nr0021", "S844Nr0567"
I tried to use rename_at to select the columns and apply the appropriate function such as sprintf,
df %>% rename_at(vars(starts_with("[A-B][0-9]")), ~ FUNCTION)
but could not build correct function. Can you please advise any way to deal with that kind of mixed strings?
Thanks in advance
# your data.frame
df <- data.frame(Sample_ID = 1, Time00 = 1, X7236Nr1 = 1, Y844Nr1856 = 1, X9834Nr21 = 1, S844Nr567 = 1)
# one row only base R for the enthusiasts w/o any explanation
df <- do.call(data.frame, lapply(names(df), function(x) setNames(list(df[[x]]), if(grepl("Nr(\\d+)", x)) paste0(sub("Nr(\\d+)", "", x), "Nr", sprintf("%04d", as.numeric(sub('.+Nr(.+)', '\\1', x)))) else x)))
I have a dataframe with some column names like; "Sample_ID", "Time00", "X7236Nr1", "Y844Nr1856", "X9834Nr21", "S844Nr567"
you can do it by using str_replace_all
with a str_match
that finds the "Nr" + number and str_pad()
s the number to 4 digits with zeroes.
library(dplyr)
library(stringr)
# your data.frame
df <- data.frame(Sample_ID = 1, Time00 = 1, X7236Nr1 = 1, Y844Nr1856 = 1, X9834Nr21 = 1, S844Nr567 = 1)
df <- df %>%
rename_with(~ str_replace_all(., "Nr(\\d+)", function(x) {
match <- str_match(x, "Nr(\\d+)")
if (!is.na(match[2])) {
paste0("Nr", str_pad(match[2], 4, pad = "0")) # only do if "Nr" is found
} else {
x
}
}))
### Result
> colnames(df)
"Sample_ID" "Time00" "X7236Nr0001" "Y844Nr1856" "X9834Nr0021" "S844Nr0567"
# Explanations
> str_match("Y844Nr0856", "Nr(\\d+)")
[,1] [,2]
[1,] "Nr0856" "0856"
> str_match("Time00", "Nr(\\d+)") # has NA as match[,2], therefore we will not replace anything
[,1] [,2]
[1,] NA NA
> str_pad("856", 4, pad = "0") # could also use sprintf()
[1] "0856"