Following code is for dataframes df1, df2. The code is reading the data columns (var) and look in each dataframe, if the var column is not present, it adds it and put NA in that particular column.
dfs1 <- c('df1','df2')
var <- c('City_Name', 'Temp', 'Pres' , 'Wind_Hor' , 'Wind_Ver' , 'Rainf' , 'S_Moist')
lapply(dfs1, \(x) {
dfn <- get(x, envir = .GlobalEnv)
dfn[[var[which(is.na(match(var,names(dfn))))]]] <- NA
dfn <- dfn %>% select(all_of(var))
return(assign(x,dfn,envir = .GlobalEnv))
})
If I have a list of files, how can I modify the above code?
I tried following way
dfs1 <- list.files(path = 'D:/Test3', pattern = "*txt", recursive = TRUE)
var <- c('D/T', 'City_Name', 'Temp', 'Pres', 'Wind_Hor', 'Wind_Ver', 'Rainf', 'S_Moist')
lapply(dfs1, \(x) {
dfn <- get(x, envir = .GlobalEnv)
dfn[[var[which(is.na(match(var,names(dfn))))]]] <- NA
dfn <- dfn %>% select(all_of(var))
return(assign(x,dfn,envir = .GlobalEnv))
})
But it returns an error:
Error in get(x, envir = .GlobalEnv) :
object 'File/File1.txt' not found
Could anyone please respond how to modify the code for files.
library(dplyr)
library(data.table)
# I find it easier to use data.table in this case when it comes to assigning NA's using dt[, (character_vector) := NA]
var <- c('City_Name', 'Temp', 'Pres' , 'Wind_Hor' , 'Wind_Ver' , 'Rainf' , 'S_Moist')
## comment 2 columns
df1 = data.frame(City_Name = "NYC",
Temp = 20,
Pres = 10,
#Wind_Hor = 5,
Wind_Ver = 5,
# Rainf = 10,
S_Moist = 5)
## Comment 3
df2 = data.frame(#City_Name = "NYC",
Temp = 15,
#Pres = 15,
Wind_Hor = 5,
Wind_Ver = 5,
Rainf = 15)
#S_Moist = 5)
## put the dfs as a list
dfs1 <- list(df1, df2)
## loop through
processed_dfs <- lapply(seq_along(dfs1), function(x) {
dfn = dfs1[[x]]
dfn_nms = names(dfn)
#get missing column names
var_missing = var[!var %in% dfn_nms]
setDT(dfn) # convert to data.table
dfn[, (var_missing) := NA] # asign NA to missing
dfn[, ..var] ## data.table select statement
})
## combine final output
## dplyr method
final_df <- bind_rows(processed_dfs)
## if you want final as data.table
final_df <- rbindlist(processed_dfs)
# I made the above code to make it reproducible if I wanted to combine it by from reading text files from my disk then
## this will give you the files
## you need to read in the files
## to read in files create file_paths
dfs1 <- list.files(path = 'D:/Test3',
pattern = "*txt",
recursive = TRUE)
## this will create file paths ie appending the name of the file to folder name
dfs1_file_paths = file.path( 'D:/Test3', dfs1)
var <- c('D/T', 'City_Name', 'Temp',
'Pres', 'Wind_Hor',
'Wind_Ver', 'Rainf',
'S_Moist')
processed_dfs <- lapply(seq_along(dfs1), function(x) {
file_x = dfs1_file_paths[[x]] ## file path i
## read the file
dfn <- fread(file_x) ## use can also use read.table but you need one more step to convert to data.table
dfn_nms = names(dfn)
#get missing column names
var_missing = var[!var %in% dfn_nms]
dfn[, (var_missing) := NA] # asign NA to missing
dfn[, ..var] ## data.table select statement
})
## combine final output
## dplyr method
final_df <- bind_rows(processed_dfs)
## if you want final as data.table
final_df <- rbindlist(processed_dfs)
# Hope this helps