I am experimenting with using the RDCOMClient package to open a dataframe on the fly into Excel. I have functional code which will open a dataframe into Excel, however the issue I am running into is that the memory resources from loading the data into Excel are not being released by rsession.exe when the code completes. Even after closing the Excel application the memory resources are not released. Is there something I am missing, that I could use to release the memory from rsession.exe? The only way I have been able to release the memory is by closing out of RStudio and opening it back up again.
Code for the function
in.xl <- function(data, headers = TRUE, rownames = FALSE) {
require(RDCOMClient)
# Attempt to coerce non dataframe data into a dataframe
if (!is.data.frame(data)) {
data <- as.data.frame(data)
}
# Set row range for data
if (headers == TRUE) {
d.row.start <- 2
d.row.end <- nrow(data) + 1
} else {
d.row.start <- 1
d.row.end <- nrow(data)
}
# Set column range for data
if (rownames == TRUE) {
d.col.start <- 2
d.col.end <- ncol(data) + 1
} else {
d.col.start <- 1
d.col.end <- ncol(data)
}
# Create COM Connection to Excel
xlApp <- COMCreate("Excel.Application")
xlWB <- xlApp[["Workbooks"]]$Add()
xlSheet <- xlWB$Sheets(1)
# Check if headers should be included
if (headers == TRUE) {
# Create a dataframe from headers
headers <- t(as.data.frame(colnames(data)))
# Set range for header values
startCell <- xlSheet$Cells(1, d.col.start)
endCell <- xlSheet$Cells(1, d.col.end)
rng <- xlSheet$Range(startCell, endCell)
# Add headers to Excel sheet
rng[["Value"]] <- asCOMArray(headers)
# Remove header dataframe
rm(headers)
}
# Check if rownames should be included
if(rownames == TRUE) {
# Create dataframe from row names
if (is.null(rownames(data))) {
rnames = as.data.frame(1:nrow(data))
} else {
rnames = as.data.frame(rownames(data))
}
# Set range for row name values
startCell <- xlSheet$Cells(d.row.start, 1)
endCell <- xlSheet$Cells(d.row.end, 1)
rng <- xlSheet$Range(startCell, endCell)
# Add row names to Excel sheet
rng[["Value"]] <- asCOMArray(rnames)
# Remove row name dataframe
rm(rnames)
}
xlApp[["ScreenUpdating"]] <- FALSE
nblocks <- ceiling(nrow(data) / 2000)
pb <- txtProgressBar(min = 0, max = nblocks, initial = 0, style = 3, width = 20)
data.start <- d.row.start
block <- 1
d.row.end <- d.row.start
df.row.start <- 1
while(d.row.end < nrow(data)) {
d.row.end <- d.row.start + 1999
df.row.end <- df.row.start + 1999
if (d.row.end > nrow(data) + data.start) {
d.row.end <- nrow(data) + data.start - 1
}
if (df.row.end > nrow(data)) {
df.row.end <- nrow(data)
}
xlApp[["StatusBar"]] <- paste("Processing block", block, "of", nblocks)
# Set range for data values
rng <- xlSheet$Range(xlSheet$Cells(d.row.start, d.col.start), xlSheet$Cells(d.row.end, d.col.end))
# Add data to Excel sheet
rng[["Value"]] <- asCOMArray(data[df.row.start:df.row.end, ])
d.row.start <- d.row.end + 1
df.row.start <- df.row.end + 1
if (block != nblocks) {
block <- block + 1
}
# update the progress bar with the current value
setTxtProgressBar(pb,block)
rm(rng, vals)
gc()
}
xlApp[["StatusBar"]] <- "Formatting Columns..."
# Auto adjust column widths
for(c in 1:d.col.end) {
col <- xlSheet$Columns(c)
col[["EntireColumn"]]$AutoFit()
}
xlApp[["StatusBar"]] <- ""
xlApp[["ScreenUpdating"]] <- TRUE
# Show Excel application
xlApp[["Visible"]] <- TRUE
gc()
}
Code to generate a large dataframe for testing. (Makes it easier to see the memory usage issue when looking at the rsession.exe process in Task Manager)
df <- data.frame(replicate(20, sample(replicate(10, paste(sample(LETTERS, 15, rep = TRUE), collapse = "")), 100000, rep = TRUE)))
in.xl(df)
You can use the package callr. With the following code, the memory is released :
df <- data.frame(replicate(20, sample(replicate(10, paste(sample(LETTERS, 15, rep = TRUE), collapse = "")), 1000000, rep = TRUE)))
library(callr)
callr::r(func = in.xl, args = list(data = df))