pythoncsvtensorflowtensorflow2.0tensorflow.js

Loading a csv to perform inference in tensorflow.js


I've tried several ways to parse csv. I have a csv file. I want to obtain arrays out of the data. Pandas equivalent

pd.read_csv('csv_file.csv').values  # returns [100, 14] dim array

I've tried papa parse for parsing csv file.

let parsed_data = papa.parse(file, 
         {
         header: true ,
         newline: '\n',
         dynamicTyping: true,
         complete:function(results)
           {
                data = results.data; 
           }}
);

This returns a [100,1] dim array. I tried tf.data.csv and it doesn't seem to work

async function parse_data(){
  csvDataset = tf.data.csv(data_path,
    {
      hasHeader: true
    }
  );
  console.log(csvDataset);
};

Console.log returns Object { size: null, input: {…}

I want to perform inference, something like this (Python equivalent)

model.predict(tf.tensor(pd.read_csv('csv').values))

Solution

  • tf.data.csv returns a tf.csv.Dataset which is an async iterator. The data can be retrieved to create a tensor. Similar question has been asked here

    const csvUrl =
    'https://storage.googleapis.com/tfjs-examples/multivariate-linear-regression/data/boston-housing-train.csv';
    
    async function run() {
    
      const csvDataset = tf.data.csv(
        csvUrl, {
          columnConfigs: {
            medv: {
              isLabel: true
            }
          }
        });
    
      const numOfFeatures = (await csvDataset.columnNames()).length - 1;
    
      // Prepare the Dataset for training.
      const flattenedDataset =
        csvDataset
        .map(({xs, ys}) =>
          {
            // Convert xs(features) and ys(labels) from object form (keyed by
            // column name) to array form.
            return {xs:Object.values(xs), ys:Object.values(ys)};
          })
        //.batch(10);
    
    const it = await flattenedDataset.iterator()
       const xs = []
       const ys = []
       // read only the data for the first 5 rows
       // all the data need not to be read once 
       // since it will consume a lot of memory
       for (let i = 0; i < 5; i++) {
            let e = await it.next()
          xs.push(e.value.xs)
          ys.push(e.value.ys)
       }
      const features = tf.tensor(xs)
      const labels = tf.tensor(ys)
    
      console.log(features.shape)
      console.log(labels.shape)
    
    }
    
    run();