I'm working on a project where I'm attempting to use Sheets API to grab the data from a large set of data. It's about 400K rows with 11 columns. I have seen methods where people split the load into batches of 100K rows at a time but I'm at a loss at how I would do this while performing some cleanup. I have have to loop through the rows to remove any rows where one of the columns equals 1 of 36 values. Here is a sample of a script that I attempted but I get stuck at the dataSourceArr
gathering of data as that is the sheet that contains the large dataset.
//Data from Content Column. These are the excluded items
excludedContentArr = Sheets.Spreadsheets.Values.get(tss.getId(), `'` + excludedContentSht.getName() + `'!A2:A`).values;
//This is the large dataset that is giving me an error right now
dataSourceArr = Sheets.Spreadsheets.Values.get(sourceSS.getId(), `'` + dataSourceSht.getName() + `'!A1:` + columnToLetter(dataSourceSht.getLastColumn())).values;
var headersDataSrcArr = dataSourceArr[0];//Get the first and only inner array
var eidDataSrcCN = headersDataSrcArr.indexOf('EID') + 1;//Arrays are zero indexed- add 1.
var contentDataSrcCN = headersDataSrcArr.indexOf('Content') + 1;//Arrays are zero indexed- add 1.
var statusDataSrcCN = headersDataSrcArr.indexOf('Status') + 1;//Arrays are zero indexed- add 1.
var compDateDataSrcCN = headersDataSrcArr.indexOf('Completion Date') + 1;//Arrays are zero indexed- add 1.
var pathwayDataSrcCN = headersDataSrcArr.indexOf('Pathway') + 1;//Arrays are zero indexed- add 1.
var values = [];
for(var i in dataSourceArr){
var excludedContentIndex = excludedContentArr.map(r => r[0]).indexOf(dataSourceArr[i][contentDataSrcCN-1]);
if(excludedContentIndex == -1){
//Action if not found
values.push([
dataSourceArr[i][eidDataSrcCN-1],
dataSourceArr[i][contentDataSrcCN-1],
dataSourceArr[i][statusDataSrcCN-1],
dataSourceArr[i][compDateDataSrcCN-1],
dataSourceArr[i][pathwayDataSrcCN-1]
]);
}
}
Logger.log('Found ' + values.length + ' rows of data that met the criteria.');
//Paste into the destination sheet.
Sheets.Spreadsheets.Values.update({ values }, tss.getId(), `'` + pathwayContentCompSht.getName() + `'!A2`, { valueInputOption: "USER_ENTERED" });
You can see where I am looping to exclude any rows that contain any of the excluded content. Example of data to be excluded:
Content |
---|
Back to the Future 1 |
Back to the Future 2 |
Back to the Future 3 |
Example of Large Dataset:
EID | Content | Status | Completion Date | Pathway |
---|---|---|---|---|
a1 | Rocky 5 | Complete | 01/01/2024 | abc123 |
a2 | Back to the Future 1 | Complete | 01/01/2024 | abc123 |
a3 | Rocky 4 | Complete | 01/01/2024 | abc123 |
a4 | Rocky 3 | Complete | 01/01/2024 | abc123 |
a5 | Back to the Future 2 | Complete | 01/01/2024 | abc123 |
a6 | Rocky 2 | Complete | 01/01/2024 | abc123 |
a7 | Rocky 1 | Complete | 01/01/2024 | abc123 |
a8 | Matrix Revolutions | Complete | 01/01/2024 | abc123 |
a9 | Back to the Future 3 | Complete | 01/01/2024 | abc123 |
a10 | Matrix | Complete | 01/01/2024 | abc123 |
Example of End Result:
EID | Content | Status | Completion Date | Pathway |
---|---|---|---|---|
a1 | Rocky 5 | Complete | 01/01/2024 | abc123 |
a3 | Rocky 4 | Complete | 01/01/2024 | abc123 |
a4 | Rocky 3 | Complete | 01/01/2024 | abc123 |
a6 | Rocky 2 | Complete | 01/01/2024 | abc123 |
a7 | Rocky 1 | Complete | 01/01/2024 | abc123 |
a8 | Matrix Revolutions | Complete | 01/01/2024 | abc123 |
a10 | Matrix | Complete | 01/01/2024 | abc123 |
From your following reply,
The error message is HttpResponseException: Response Code: 413. Message: response too large.
The error occurs on the second line with dataSourceArr = Sheets.Spreadsheets.Values.get(...
I understand that your error of HttpResponseException: Response Code: 413. Message: response too large.
occurs at dataSourceArr = Sheets.Spreadsheets.Values.get(sourceSS.getId(),
'+ dataSourceSht.getName() +
'!A1: + columnToLetter(dataSourceSht.getLastColumn())).values;
. If your other parts except for this work fine, how about the following modification?
In this modification, the values are retrieved and put by separating rows.
Before you use this script, please enable Sheets API at Advanced Google services.
// ref: https://tanaikech.github.io/2022/04/25/report-handling-10000000-cells-in-google-spreadsheet-using-google-apps-script/
function getValues_({ spreadsheetId, sheetName, start = 1, maxRow, limit = 100000 }) {
return [...Array(Math.ceil(maxRow / limit))].flatMap((_) => {
const last = start - 1 + limit;
const range = `'${sheetName}'!A${start}:${last > maxRow ? maxRow : last}`;
const temp = Sheets.Spreadsheets.Values.get(spreadsheetId, range).values;
start += limit;
return temp;
});
}
// ref: https://tanaikech.github.io/2022/04/25/report-handling-10000000-cells-in-google-spreadsheet-using-google-apps-script/
function setValues_({ spreadsheetId, sheetName, start = 1, maxRow, limit = 100000, values }) {
Array.from(Array(Math.ceil(maxRow / limit))).forEach((_) => {
const v = values.splice(0, limit);
Sheets.Spreadsheets.Values.update({ values: v }, spreadsheetId, `'${sheetName}'!A${start}`, { valueInputOption: "USER_ENTERED" });
start += limit;
});
}
function myFunction() {
// Please set your variables.
var tss =
var excludedContentSht =
var sourceSS =
var dataSourceSht =
var pathwayContentCompSht =
// --- I modified the below script.
excludedContentArr = getValues_({ spreadsheetId: tss.getId(), sheetName: excludedContentSht.getName(), start: 2, maxRow: excludedContentSht.getLastRow(), limit: 100000 });
dataSourceArr = getValues_({ spreadsheetId: sourceSS.getId(), sheetName: dataSourceSht.getName(), start: 1, maxRow: dataSourceSht.getLastRow(), limit: 100000 });
// ---
var headersDataSrcArr = dataSourceArr[0];//Get the first and only inner array
var eidDataSrcCN = headersDataSrcArr.indexOf('EID') + 1;//Arrays are zero indexed- add 1.
var contentDataSrcCN = headersDataSrcArr.indexOf('Content') + 1;//Arrays are zero indexed- add 1.
var statusDataSrcCN = headersDataSrcArr.indexOf('Status') + 1;//Arrays are zero indexed- add 1.
var compDateDataSrcCN = headersDataSrcArr.indexOf('Completion Date') + 1;//Arrays are zero indexed- add 1.
var pathwayDataSrcCN = headersDataSrcArr.indexOf('Pathway') + 1;//Arrays are zero indexed- add 1.
var values = [];
for (var i in dataSourceArr) {
var excludedContentIndex = excludedContentArr.map(r => r[0]).indexOf(dataSourceArr[i][contentDataSrcCN - 1]);
if (excludedContentIndex == -1) {
//Action if not found
values.push([
dataSourceArr[i][eidDataSrcCN - 1],
dataSourceArr[i][contentDataSrcCN - 1],
dataSourceArr[i][statusDataSrcCN - 1],
dataSourceArr[i][compDateDataSrcCN - 1],
dataSourceArr[i][pathwayDataSrcCN - 1]
]);
}
}
Logger.log('Found ' + values.length + ' rows of data that met the criteria.');
// --- I modified the below script.
setValues_({ spreadsheetId: tss.getId(), sheetName: pathwayContentCompSht.getName(), start: 2, maxRow: values.length, limit: 100000, values });
// ---
}