pythontensorflowtensorflow2.0tensorflow-datasetstfrecord

Storing multiple values in a tfrecord feature


image_id class_1_rle class_2_rle class_3_rle
0002cc93b.jpg 29102 12 29346 24...
0007a71bf.jpg 18661 28 18863 82...
000a4bcdd.jpg 131973 1 132228 4... 229501 11 229741 33...

I am trying to create the tfrecords using the above table. I need to get together the rle (Run Length Encoding) feature in the form of rle per class. Eg. the feature in the final tfrecord looks like

img_id: b'0002cc93b.jpg'
rle: [b'1 0'  b'29102 12 29346 24...'  b'1 0']

img_id: b'000a4bcdd.jpg'
rle: [b'131973 1 132228 4...'  b'1 0'  b'229501 11 229741 33...']

The rle feature should contain the rles for all the 3 masks for the corresponding image as strings and empty rle should be encoded as '1 0'

I tried using a list. But it gives the following error

TypeError: ['29102 12 29346 24 29602 24 29858 24 30114 24 30370 24 30626 24 30882 24 31139 23 31395 23 31651 23 has type list, but expected one of: bytes

Solution

  • I found an overall solution that suits my case.

    The specific solution of storing multiple values in a feature.

    Used pandas to replace empty rles in the df as'1 0'

    A function to grab the rles corresponding to the image from the df.

    def rle_class_1(image_id):
        temp_df = df['class_1_rle'][df['image_id'] == image_id]
        for rle in temp_df:
            rle_tensor = tf.constant(rle)
            return rle_tensor.numpy() 
    

    Similar functions for class_2 and class_3.

    Create tfrecord

    paths_dict = dict(zip(file_ids, file_paths))
    
    def _bytestring_feature(list_of_bytestrings):
        return tf.train.Feature(bytes_list=tf.train.BytesList(value=list_of_bytestrings))
    
    def _int_feature(list_of_ints):
        return tf.train.Feature(int64_list=tf.train.Int64List(value=list_of_ints))
    
    def image_bits_from_id(image_id):
        image = Image.open(paths_dict[image_id])
        image = tf.constant(image)
        image_bits = tf.image.encode_jpeg(image, optimize_size=True, chroma_downsampling=False)
        image_bits = image_bits.numpy()
        return image_bits
    
    def create_tfrec_example(image_id):
    
        image = image_bits_from_id(image_id) 
        rle_1 = rle_class_1(image_id)
        rle_2 = rle_class_2(image_id)
        rle_3 = rle_class_3(image_id)
        
        feature = {
            'image': _bytestring_feature([image]),
            'img_id': _bytestring_feature([image_id.encode()]),
            'rle': _bytestring_feature([rle_1, rle_2, rle_3])
            }
    
        tfrec_example = tf.train.Example(features=tf.train.Features(feature=feature))
        return tfrec_example
    

    Parse and view record

    def parse_tfrecord_fn(example):
        features = {
            'image': tf.io.FixedLenFeature([], tf.string), 
            'img_id': tf.io.FixedLenFeature([], tf.string)
            }
        features['rle'] = tf.io.FixedLenFeature([3], tf.string)
    
        example = tf.io.parse_single_example(example, features)
        example["image"] = tf.io.decode_jpeg(example["image"], channels=3)
        return example
    
    raw_dataset = tf.data.TFRecordDataset(TFREC[0])   # TFREC is a shard
    parsed_dataset = raw_dataset.map(parse_tfrecord_fn)
    
    for features in parsed_dataset.take(5):
        for key in features.keys():
            if key != "image":
                print(f"{key}: {features[key]}")
    
        print(f"Image shape: {features['image'].shape}")
        plt.figure(figsize=(7, 7))
        plt.imshow(features["image"].numpy())
        plt.show()
    

    Output

    img_id: b'0002cc93b.jpg'
    rle: [b'29102 12 29346 24 29602 24 29858 24... '
     b'1 0' b'1 0']
    Image plot