Best
I would like to read some TF records data.
This works, but only for Fixed length data, but now I would like to do the same thing with variable length data VarLenFeature
def load_tfrecord_fixed(serialized_example):
context_features = {
'length':tf.FixedLenFeature([],dtype=tf.int64),
'type':tf.FixedLenFeature([],dtype=tf.string)
}
sequence_features = {
"values":tf.FixedLenSequenceFeature([], dtype=tf.int64)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
return context_parsed,sequence_parsed
and
tf.reset_default_graph()
with tf.Session() as sess:
filenames = [fp.name]
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(load_tfrecord_fixed)
dataset = dataset.repeat()
dataset = dataset.batch(2)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()
a = sess.run(iterator.initializer)
for i in range(3):
a = sess.run(next_element)
print(a)
result:
({'length': array([3, 3], dtype=int64), 'type': array([b'FIXED_length', b'FIXED_length'], dtype=object)}, {'values': array([[82, 2, 2],
[42, 5, 1]], dtype=int64)}) ({'length': array([3, 3], dtype=int64), 'type': array([b'FIXED_length', b'FIXED_length'], dtype=object)}, {'values': array([[2, 3, 1],
[1, 2, 3]], dtype=int64)}) ({'length': array([3, 3], dtype=int64), 'type': array([b'FIXED_length', b'FIXED_length'], dtype=object)}, {'values': array([[ 1, 100, 200],
[123, 12, 12]], dtype=int64)})
here is the map function which i'm trying to use, but at the end it gives me some errors :'(
def load_tfrecord_variable(serialized_example):
context_features = {
'length':tf.FixedLenFeature([],dtype=tf.int64),
'batch_size':tf.FixedLenFeature([],dtype=tf.int64),
'type':tf.FixedLenFeature([],dtype=tf.string)
}
sequence_features = {
"values":tf.VarLenFeature(tf.int64)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
#return context_parsed, sequence_parsed (which is sparse)
# return context_parsed, sequence_parsed
batched_data = tf.train.batch(
tensors=[sequence_parsed['values']],
batch_size= 2,
dynamic_pad=True
)
# make dense data
dense_data = tf.sparse_tensor_to_dense(batched_data)
return context_parsed, dense_data
error:
OutOfRangeError: Attempted to repeat an empty dataset infinitely.
[[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[], [], [], [?,?,?]], output_types=[DT_INT64, DT_INT64, DT_STRING, DT_INT64], _device="/job:localhost/replica:0/task:0/device:CPU:0"](Iterator)]]
During handling of the above exception, another exception occurred:
Thus can you someone help me? Also, i'm using tensorflow nightly. I don't think that i'm missing a lot ...
def load_tfrecord_variable(serialized_example):
context_features = {
'length':tf.FixedLenFeature([],dtype=tf.int64),
'batch_size':tf.FixedLenFeature([],dtype=tf.int64),
'type':tf.FixedLenFeature([],dtype=tf.string)
}
sequence_features = {
"values":tf.VarLenFeature(tf.int64)
}
context_parsed, sequence_parsed = tf.parse_single_sequence_example(
serialized=serialized_example,
context_features=context_features,
sequence_features=sequence_features
)
length = context_parsed['length']
batch_size = context_parsed['batch_size']
type = context_parsed['type']
values = sequence_parsed['values'].values
return tf.tuple([length, batch_size, type, values])
#
filenames = [fp.name]
dataset = tf.data.TFRecordDataset(filenames)
dataset = dataset.map(load_tfrecord_fixed)
dataset = dataset.repeat()
dataset = dataset.padded_batch(
batch_size,
padded_shapes=(
tf.TensorShape([]),
tf.TensorShape([]),
tf.TensorShape([]),
tf.TensorShape([None]) # if you reshape 'values' in load_tfrecord_variable, add the added dims after None, e.g. [None, 3]
),
padding_values = (
tf.constant(0, dtype=tf.int64),
tf.constant(0, dtype=tf.int64),
tf.constant(""),
tf.constant(0, dtype=tf.int64)
)
)
iterator = dataset.make_initializable_iterator()
next_element = iterator.get_next()
with tf.Session() as sess:
a = sess.run(iterator.initializer)
for i in range(3):
[length_vals, batch_size_vals, type_vals, values_vals] = sess.run(next_element)