pythontensorflowdecodetfrecorddata-augmentation

How to read (decode) .tfrecords file, see the images inside and do augmentation?


I have a .tfrecords file and I want to extract, see the images in the file and augment them. I am using https://colab.research.google.com TensorFlow version: 2.3.0

And for the following code

raw_dataset = tf.data.TFRecordDataset("*path.tfrecords")

for raw_record in raw_dataset.take(1):
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    print(example)

I am facing the following output:

features {
  feature {
    key: "depth"
    value {
      int64_list {
        value: 3
      }
    }
  }
  feature {
    key: "height"
    value {
      int64_list {
        value: 333
      }
    }
  }
  feature {
    key: "image_raw"
    value {
      bytes_list {
        value:
      }
    }
  }
  feature {
    key: "label"
    value {
      int64_list {
        value: 16
      }
    }
  }
  feature {
    key: "width"
    value {
      int64_list {
        value: 500
      }
    }
  }
}

Solution

  • Here is a simple code that can extract your .tfrecord images as .png format.

    To run next codes you need to install one time pip modules through pip install tensorflow tensorflow_addons pillow numpy matplotlib.

    import os
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    import tensorflow as tf, PIL.Image, numpy as np
    
    raw_dataset = tf.data.TFRecordDataset('max_32_set.tfrecords')
    
    for i, raw_record in enumerate(raw_dataset.take(3)):
        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        info = {}
        for k, v in example.features.feature.items():
            if k == 'image_raw':
                info[k] = v.bytes_list.value[0]
            elif k in ['depth', 'height', 'width']:
                info[k] = v.int64_list.value[0]
        img_arr = np.frombuffer(info['image_raw'], dtype = np.uint8).reshape(
            info['height'], info['width'], info['depth']
        )
        # You can use img_arr numpy array above to directly augment/preprocess
        # your image without saving it to .png.
        img = PIL.Image.fromarray(img_arr)
        img.save(f'max_32_set.tfrecords.{str(i).zfill(5)}.png')
    

    First image from dataset:

    img

    Below is code for drawing number of images per each label. Labels inside max_32_set.tfrecords file are represented as integers (not string names), probably names of labels are located in separate small file with meta information about dataset.

    import os
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    import tensorflow as tf, numpy as np, matplotlib.pyplot as plt
    
    raw_dataset = tf.data.TFRecordDataset('max_32_set.tfrecords')
    
    labels_cnts = {}
    for i, raw_record in enumerate(raw_dataset.as_numpy_iterator()):
        example = tf.train.Example()
        example.ParseFromString(raw_record)
        info = {}
        for k, v in example.features.feature.items():
            if k == 'label':
                info[k] = v.int64_list.value[0]
        labels_cnts[info['label']] = labels_cnts.get(info['label'], 0) + 1
    
    x, y = zip(*sorted(labels_cnts.items(), key = lambda e: e[0]))
    plt.xlabel('label')
    plt.ylabel('num images')
    plt.plot(x, y)
    plt.xticks(x)
    plt.show()
    

    Plot for max_32_set.tfrecords:

    plot

    Next code does augmentation using gaussian noise and gaussian blur, augmented tfrecord dataset is saved to max_32_set.augmented.tfrecords file:

    import os
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    import tensorflow as tf, tensorflow_addons as tfa, PIL.Image, numpy as np, math
    
    c_inp_fname = 'max_32_set.tfrecords'
    c_out_fname = 'max_32_set.augmented.tfrecords'
    c_augment_types = ('noise', 'blur', 'noise_blur', 'noise_blur_mirror')
    c_res_class_size = None # If None then auto configured to maximal class size
    
    def calc_labels():
        raw_dataset = tf.data.TFRecordDataset(c_inp_fname)
        cnts, labels = {}, []
        for i, raw_record in enumerate(raw_dataset):
            example = tf.train.Example()
            example.ParseFromString(raw_record.numpy())
            label = example.features.feature['label'].int64_list.value[0]
            cnts[label] = cnts.get(label, 0) + 1
            labels.append(label)
        return cnts, labels
    
    def img_gen():
        raw_dataset = tf.data.TFRecordDataset(c_inp_fname)
        for i, raw_record in enumerate(raw_dataset):
            example = tf.train.Example()
            example.ParseFromString(raw_record.numpy())
            info = {}
            for k, v in example.features.feature.items():
                if k == 'image_raw':
                    info[k] = v.bytes_list.value[0]
                elif k in ['depth', 'height', 'width']:
                    info[k] = v.int64_list.value[0]
            img_arr = np.frombuffer(info['image_raw'], dtype = np.uint8).reshape(
                info['height'], info['width'], info['depth']
            )
            yield example, img_arr
            
    def gaussian_noise(inp, stddev):
        noise = tf.random.normal(shape = tf.shape(inp), mean = 0.0, stddev = stddev, dtype = inp.dtype)
        return inp + noise
            
    def augment(a, cnt):
        min_noise_stddev, max_noise_stddev = 5., 20.
        blur_kern, min_blur_stddev, max_blur_stddev = 3, 1., 5.
        
        assert cnt >= 1
        pad_a = lambda x: np.pad(x, (
            (0, 2 ** math.ceil(math.log(x.shape[0]) / math.log(2)) - x.shape[0]),
            (0, 2 ** math.ceil(math.log(x.shape[1]) / math.log(2)) - x.shape[1]),
            (0, 0)), constant_values = 0)
        post_a = lambda x: np.clip(x[:a.shape[0], :a.shape[1]], 0, 255).astype(np.uint8)
        yield 'orig', a
        cnt -= 1
        res = []
        fcnt = math.ceil(cnt / len(c_augment_types))
        linsp = lambda l, r, c: [(l + (i + 1) * (r - l) / (c + 1)) for i in range(c)]
        for noise_stddev, blur_stddev in zip(linsp(min_noise_stddev, max_noise_stddev, fcnt), linsp(min_blur_stddev, max_blur_stddev, fcnt)):
            if 'noise' in c_augment_types:
                #yield 'noise', post_a(tf.keras.layers.GaussianNoise(stddev = noise_stddev)(prep_a, training = True).numpy())
                res.append(('noise', post_a(gaussian_noise(a.astype(np.float32), stddev = noise_stddev).numpy())))
            if 'blur' in c_augment_types:
                res.append(('blur', post_a(tfa.image.gaussian_filter2d(pad_a(a).astype(np.float32), filter_shape = blur_kern, sigma = blur_stddev).numpy())))
            if 'noise_blur' in c_augment_types or 'noise_blur_mirror' in c_augment_types:
                nbr = post_a(tfa.image.gaussian_filter2d(
                    pad_a(gaussian_noise(a.astype(np.float32), stddev = noise_stddev).numpy()),
                    filter_shape = blur_kern, sigma = blur_stddev).numpy())
                if 'noise_blur' in c_augment_types:
                    res.append(('noise_blur', nbr))
                if 'noise_blur_mirror' in c_augment_types:
                    res.append(('noise_blur_mirror', tf.image.flip_left_right(nbr).numpy().astype(np.uint8)))
        assert cnt <= len(res) <= cnt + len(c_augment_types), (cnt, len(res), len(c_augment_types))
        yield from res[:cnt]
    
    def process():
        labels_cnts, labels = calc_labels()
        max_class_size = max(labels_cnts.values())
        if c_res_class_size is not None:
            assert max_class_size <= c_res_class_size, f'Maximal class size is {max_class_size}, while requested res class size is smaller, {c_res_class_size}!'
            class_size = c_res_class_size
        else:
            class_size = max_class_size
        cur_labels_cnts = {}
        for iimg, (proto, imga) in enumerate(img_gen()):
            label = proto.features.feature['label'].int64_list.value[0]
            cur_labels_cnts[label] = cur_labels_cnts.get(label, 0) + 1
            need_cnt = class_size // labels_cnts[label] + int(cur_labels_cnts[label] <= class_size % labels_cnts[label])
            for iaug, (taug, aug) in enumerate(augment(imga, need_cnt)):
                #PIL.Image.fromarray(aug).save(f'max_32_set.tfrecords.aug.{str(iimg).zfill(5)}.{iaug}_{taug}.png')
                protoc = type(proto)()
                protoc.ParseFromString(proto.SerializeToString())
                protoc.features.feature['image_raw'].bytes_list.value[0] = aug.tobytes()
                yield protoc.SerializeToString()
            if (iimg % 10) == 0:
                print(iimg, ' ', sep = '', end = '', flush = True)
                
    def main():
        assert tf.executing_eagerly()
        tf.data.experimental.TFRecordWriter(c_out_fname).write(
            tf.data.TFRecordDataset.from_generator(process, tf.string)
        )
    
    main()
    

    Example augmented images:

    Original:

    enter image description here

    Noised:

    enter image description here

    Blurred:

    enter image description here

    Noised-blurred:

    enter image description here

    Noised-blurred-mirrored:

    enter image description here

    Number of images per label after augmentation (exactly balanced 30 images per label):

    enter image description here


    Same augmentation as above but for the case of input and output folders with labeled images, instead of TFRecordDataset, change c_inp_dir and c_out_dir to your folders paths:

    import os
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
    import tensorflow as tf, tensorflow_addons as tfa, PIL.Image, numpy as np, math, matplotlib.pyplot as plt
    
    c_inp_dir = './images/'
    c_out_dir = './images_out/'
    c_augment_types = ('noise', 'blur', 'noise_blur', 'noise_blur_mirror')
    c_res_class_size = None # If None then auto configured to maximal class size
    
    def calc_labels(dirn = None):
        if dirn is None:
            dirn = c_inp_dir
        cnts, labels = {}, []
        for label in sorted(os.listdir(f'{dirn}')):
            label = int(label)
            labels.append(label)
            cnts[label] = len(os.listdir(f'{dirn}/{label}/'))
        return cnts, labels
    
    def img_gen():
        cnts = {}
        for label in sorted(os.listdir(c_inp_dir)):
            label = int(label)
            for fname in sorted(os.listdir(f'{c_inp_dir}/{label}/')):
                img_arr = np.array(PIL.Image.open(f'{c_inp_dir}/{label}/{fname}'))
                yield label, img_arr, fname
            
    def gaussian_noise(inp, stddev):
        noise = tf.random.normal(shape = tf.shape(inp), mean = 0.0, stddev = stddev, dtype = inp.dtype)
        return inp + noise
            
    def augment(a, cnt):
        min_noise_stddev, max_noise_stddev = 5., 20.
        blur_kern, min_blur_stddev, max_blur_stddev = 3, 1., 5.
        
        assert cnt >= 1
        pad_a = lambda x: np.pad(x, (
            (0, 2 ** math.ceil(math.log(x.shape[0]) / math.log(2)) - x.shape[0]),
            (0, 2 ** math.ceil(math.log(x.shape[1]) / math.log(2)) - x.shape[1]),
            (0, 0)), constant_values = 0)
        post_a = lambda x: np.clip(x[:a.shape[0], :a.shape[1]], 0, 255).astype(np.uint8)
        yield 'orig', a
        cnt -= 1
        res = []
        fcnt = math.ceil(cnt / len(c_augment_types))
        linsp = lambda l, r, c: [(l + (i + 1) * (r - l) / (c + 1)) for i in range(c)]
        for noise_stddev, blur_stddev in zip(linsp(min_noise_stddev, max_noise_stddev, fcnt), linsp(min_blur_stddev, max_blur_stddev, fcnt)):
            if 'noise' in c_augment_types:
                #yield 'noise', post_a(tf.keras.layers.GaussianNoise(stddev = noise_stddev)(prep_a, training = True).numpy())
                res.append(('noise', post_a(gaussian_noise(a.astype(np.float32), stddev = noise_stddev).numpy())))
            if 'blur' in c_augment_types:
                res.append(('blur', post_a(tfa.image.gaussian_filter2d(pad_a(a).astype(np.float32), filter_shape = blur_kern, sigma = blur_stddev).numpy())))
            if 'noise_blur' in c_augment_types or 'noise_blur_mirror' in c_augment_types:
                nbr = post_a(tfa.image.gaussian_filter2d(
                    pad_a(gaussian_noise(a.astype(np.float32), stddev = noise_stddev).numpy()),
                    filter_shape = blur_kern, sigma = blur_stddev).numpy())
                if 'noise_blur' in c_augment_types:
                    res.append(('noise_blur', nbr))
                if 'noise_blur_mirror' in c_augment_types:
                    res.append(('noise_blur_mirror', tf.image.flip_left_right(nbr).numpy().astype(np.uint8)))
        assert cnt <= len(res) <= cnt + len(c_augment_types), (cnt, len(res), len(c_augment_types))
        yield from res[:cnt]
    
    def process():
        labels_cnts, labels = calc_labels()
        max_class_size = max(labels_cnts.values())
        if c_res_class_size is not None:
            assert max_class_size <= c_res_class_size, f'Maximal class size is {max_class_size}, while requested res class size is smaller, {c_res_class_size}!'
            class_size = c_res_class_size
        else:
            class_size = max_class_size
        
        cur_labels_cnts = {}
        for iimg, (label, imga, fname) in enumerate(img_gen()):
            os.makedirs(f'{c_out_dir}/{label}/', exist_ok = True)
            cur_labels_cnts[label] = cur_labels_cnts.get(label, 0) + 1
            need_cnt = class_size // labels_cnts[label] + int(cur_labels_cnts[label] <= class_size % labels_cnts[label])
            for iaug, (taug, aug) in enumerate(augment(imga, need_cnt)):
                PIL.Image.fromarray(aug).save(f'{c_out_dir}/{label}/{fname}.{iaug}_{taug}.png')
            if (iimg % 10) == 0:
                print(iimg, ' ', sep = '', end = '', flush = True)
                
    def plot_cnts(dirn):
        labels_cnts = calc_labels(dirn)[0]
        x, y = zip(*sorted(labels_cnts.items(), key = lambda e: e[0]))
        plt.xlabel('label')
        plt.ylabel('num images')
        plt.plot(x, y)
        plt.xticks(x)
        plt.show()
                
    def main():
        process()
        plot_cnts(c_inp_dir)
        plot_cnts(c_out_dir)
    
    main()