I have below mentioned dataframe in R.
ID Amount Date
IK-1 100 2020-01-01
IK-2 110 2020-01-02
IK-3 120 2020-01-03
IK-4 109 2020-01-03
IK-5 104 2020-01-03
I'm using ID
to fetch some details from MySQL using the following code.
library(RMySQL)
conn<- connection
query<-paste0("SELECT c.ID,e.Parameters, d.status
FROM Table1 c
left outer join Table2 d ON d.seq_id=c.ID
LEFT outer JOIN Table3 e ON e.role_id=d.role
where c.ID IN (", paste(shQuote(dataframe$ID, type = "sh"),
collapse = ', '),")
and e.Parameters in
('Section1',
'Section2','Section3',
'Section4');")
res1 <- dbGetQuery(conn,query)
res2<-res1[res1$Parameters=="Section1",4:5]
colnames(res2)[colnames(res2)=="status"] <- "Section1_Status"
The above code is working fine, If i pass ~1000 ID but it throws R termination error when passing 10000 or more ID at a time.
How can I create a loop and pass the Id in batched to get the one final output for 10000 ID.
Error Message:
Warning message:
In dbFetch(rs, n = n, ...) : error while fetching rows
# Load Packages
library(dplyr) # only needed to create the initial dataframe
library(RMySQL)
# create the initial dataframe
df <- tribble(
~ID, ~Amount, ~Date
, "IK-1" , 100 , 2020-01-01
, "IK-2" , 110 , 2020-01-02
, "IK-3" , 120 , 2020-01-03
, "IK-4" , 109 , 2020-01-03
, "IK-5" , 104 , 2020-01-03
)
# first helper function
createIDBatchVector <- function(x, batchSize){
paste0(
"'"
, sapply(
split(x, ceiling(seq_along(x) / batchSize))
, paste
, collapse = "','"
)
, "'"
)
}
# second helper function
createQueries <- function(IDbatches){
paste0("
SELECT c.ID,e.Parameters, d.status
FROM Table1 c
LEFT OUTER JOIN Table2 d ON d.seq_id =c.ID
LEFT OUTER JOIN Table3 e ON e.role_id = d.role
WHERE c.ID IN (", IDbatches,")
AND e.Parameters in ('Section1','Section2','Section3','Section4');
")
}
# ------------------------------------------------------------------
# and now the actual script
# first we create a vector that contains one batch per element
IDbatches <- createIDBatchVector(df$ID, 2)
# It looks like this:
# [1] "'IK-1','IK-2'" "'IK-3','IK-4'" "'IK-5'"
# now we create a vector of SQL-queries out of that
queries <- createQueries(IDbatches)
cat(queries) # use cat to show what they look like
# it looks like this:
# SELECT c.ID,e.Parameters, d.status
# FROM Table1 c
# LEFT OUTER JOIN Table2 d ON d.seq_id =c.ID
# LEFT OUTER JOIN Table3 e ON e.role_id = d.role
# WHERE c.ID IN ('IK-1','IK-2')
# AND e.Parameters in ('Section1','Section2','Section3','Section4');
#
# SELECT c.ID,e.Parameters, d.status
# FROM Table1 c
# LEFT OUTER JOIN Table2 d ON d.seq_id =c.ID
# LEFT OUTER JOIN Table3 e ON e.role_id = d.role
# WHERE c.ID IN ('IK-3','IK-4')
# AND e.Parameters in ('Section1','Section2','Section3','Section4');
#
# SELECT c.ID,e.Parameters, d.status
# FROM Table1 c
# LEFT OUTER JOIN Table2 d ON d.seq_id =c.ID
# LEFT OUTER JOIN Table3 e ON e.role_id = d.role
# WHERE c.ID IN ('IK-5')
# AND e.Parameters in ('Section1','Section2','Section3','Section4');
# and now the loop
df_final <- data.frame() # initialize a dataframe
conn <- connection # open a connection
for (query in queries){ # iterate over the queries
df_final <- rbind(df_final, dbGetQuery(conn,query))
}
# And here the connection should be closed. (I don't know the function call for this.)