google-cloud-platformgoogle-cloud-functionsgoogle-cloud-storagebuffercfreadstream

Google Cloud Function returns undefined on bucket finalize buffer. Unable to debug code


I'm trying to process a .csv file using GCF. The file ist transferred from another bucket into this one and im trying to read in the file using the event (finalize). (I also need to format data structure on this file, that is why I need to read the buffer in the first place)

While defining a file manually on my local environment, the code works just perfectly but when I'm switch to run it in GCF using events, it exits without errors. Just ignore my debugging logs in the code below. (It stops on "console.log("error found")" so I figure, the readstream is never created properly.)

Heres the code:

    const { Storage } = require('@google-cloud/storage');
    const Papa = require('papaparse');
    const moment = require('moment');
    
    const { BigQuery } = require('@google-cloud/bigquery');
    
    const storage = new Storage();
    const bigquery = new BigQuery();
    const dataset = bigquery.dataset('toy_test');
    const bucket = storage.bucket('ga_report');
    
    exports.readCSVContent = async (event, context) => {
    
        const gcsEvent = event;
        const fileName = gcsEvent.id;
    
        console.log(fileName);
        console.log(typeof fileName);
    
        if (
          fileName.startsWith('processed/') ||
          fileName.startsWith('unprocessed/')
        ) {
          console.log('1');
          return;
        } else {
          return await new Promise((resolve, reject) => {
            let fileContents = new Buffer('');
            console.log('2');
            try {
              bucket
                .file(fileName)
                .createReadStream({
        start: 10000,
        end: 20000
      })
                .on('error', function (err) {
                  console.log('error found');
                  reject('The Storage API returned an error: ' + err);
                })
                .on('data', function (chunk) {
                  console.log('buffer');
                  fileContents = Buffer.concat([fileContents, chunk]);
                })
                .on('end', async function () {
                  console.log('end');
                  let content = fileContents.toString('utf8');
                  try {
                    await bucket.file('unprocessed/ ' + gcsEvent.id).save(content);
                  } catch (error) {
                    console.log(error);
                  }
                  console.log('3');
                  const parsedCSV = Papa.parse(content, {
                    transform: function (element, i) {
                      if (i == 0 && element.length == 8) {
                        var year = element.substring(0, 4);
                        var month = element.substring(4, 6);
                        var day = element.substring(6, 8);
                        console.log('4');
                        const date = moment(
                          year.toString() + month.toString() + day.toString()
                        ).format('YYYY-MM-DDTHH:MM:SS');
                        console.log('5');
                        return date;
                      } else {
                        return element;
                      }
                    },
                    newline: '\n',
                    skipEmptyLines: true,
                  });
    
                  let parsedData = parsedCSV.data;
                  console.log('6');
    
                  parsedData.splice(0, 6);
                  parsedData.reverse();
                  parsedData.splice(0, 6);
                  parsedData.reverse();
    
                  console.log(parsedData);
    
                  const jsonData = parsedData.map((value, index) => {
                    return {
                      date: value[0],
                      car_model: value[1],
                      configurations: value[2],
                    };
                  });
    
    
                  try {
                    await dataset.table('my_table').insert(jsonData);
                    console.log(`Inserted ${jsonData.length} rows`);
                  } catch (error) {
                    console.log(error);
                  }
    
                  const finalCSV = Papa.unparse(parsedData);
    
                  const currentDateTime = moment().format('YYYY-MM-DD HH:mm:ss');
    
                  try {
                    await bucket
                      .file(
                        'processed/' +
                          currentDateTime +
                          ' configurations' +
                          '.csv'
                      )
                      .save(finalCSV);
                    console.log(
                      gcsEvent.id + 'in' + 'processed/'
                    );
                  } catch (error) {
                    console.log(error);
                  }
                  const [files] = await bucket.getFiles();
                  files.forEach((element) => {
                    console.log(element.name);
                  });
                });
            } catch (error) {
              console.log(error);
            }
          });
        }
    };

Solution

  • The key to this problem is the error message:

    Request range not satisfiable
    

    The code generating the error:

    bucket
      .file(fileName)
      .createReadStream({
        start: 10000,
        end: 20000
      })
    

    In this case, the object size was 700 bytes and the call to createReadStream() was specifying a starting offset of 10,000 bytes, which is not possible.

    The solution is to either specify the correct range or not use a range at all for such a small file.