I've tried several ways to parse csv. I have a csv file. I want to obtain arrays out of the data. Pandas equivalent
pd.read_csv('csv_file.csv').values # returns [100, 14] dim array
I've tried papa parse for parsing csv file.
let parsed_data = papa.parse(file,
{
header: true ,
newline: '\n',
dynamicTyping: true,
complete:function(results)
{
data = results.data;
}}
);
This returns a [100,1] dim array.
I tried tf.data.csv
and it doesn't seem to work
async function parse_data(){
csvDataset = tf.data.csv(data_path,
{
hasHeader: true
}
);
console.log(csvDataset);
};
Console.log returns Object { size: null, input: {…}
I want to perform inference, something like this (Python equivalent)
model.predict(tf.tensor(pd.read_csv('csv').values))
tf.data.csv
returns a tf.csv.Dataset
which is an async iterator. The data can be retrieved to create a tensor. Similar question has been asked here
const csvUrl =
'https://storage.googleapis.com/tfjs-examples/multivariate-linear-regression/data/boston-housing-train.csv';
async function run() {
const csvDataset = tf.data.csv(
csvUrl, {
columnConfigs: {
medv: {
isLabel: true
}
}
});
const numOfFeatures = (await csvDataset.columnNames()).length - 1;
// Prepare the Dataset for training.
const flattenedDataset =
csvDataset
.map(({xs, ys}) =>
{
// Convert xs(features) and ys(labels) from object form (keyed by
// column name) to array form.
return {xs:Object.values(xs), ys:Object.values(ys)};
})
//.batch(10);
const it = await flattenedDataset.iterator()
const xs = []
const ys = []
// read only the data for the first 5 rows
// all the data need not to be read once
// since it will consume a lot of memory
for (let i = 0; i < 5; i++) {
let e = await it.next()
xs.push(e.value.xs)
ys.push(e.value.ys)
}
const features = tf.tensor(xs)
const labels = tf.tensor(ys)
console.log(features.shape)
console.log(labels.shape)
}
run();