I'm trying to train a custom COCO-format dataset with Matterport's Mask R-CNN on Tensorflow/Keras. My datasets are json files with the aforementioned COCO-format, with each item in the "annotations" section looking like this:
There are 20 classes, with polygon masks for the entire object, and then polygon masks for the parts within the object. The images are of shape 256x448x3.
The code for the dataset preparation is as follows:
class CocoLikeDataset(utils.Dataset):
""" Generates a COCO-like dataset, i.e. an image dataset annotated in the style of the COCO dataset.
See http://cocodataset.org/#home for more information.
"""
def load_data(self, annotation_json, images_dir):
""" Load the coco-like dataset from json
Args:
annotation_json: The path to the coco annotations json file
images_dir: The directory holding the images referred to by the json file
"""
# Load json from file
json_file = open(annotation_json)
coco_json = json.load(json_file)
json_file.close()
# Add the class names using the base method from utils.Dataset
source_name = "coco_like"
for category in coco_json['categories']:
class_id = category['id']+1
class_name = category['name']
if class_id < 1:
print('Error: Class id for "{}" cannot be less than one. (0 is reserved for the background)'.format(class_name))
return
self.add_class(source_name, class_id, class_name)
# Get all annotations
annotations = {}
for annotation in coco_json['annotations']:
image_id = annotation['image_id']
if image_id not in annotations:
annotations[image_id] = []
annotations[image_id].append(annotation)
# Get all images and add them to the dataset
seen_images = {}
for image in coco_json['images']:
image_id = image['id']
if image_id in seen_images:
print("Warning: Skipping duplicate image id: {}".format(image))
else:
seen_images[image_id] = image
try:
image_file_name = image['file_name']
image_width = image['width']
image_height = image['height']
except KeyError as key:
print("Warning: Skipping image (id: {}) with missing key: {}".format(image_id, key))
image_path = os.path.abspath(os.path.join(images_dir, image_file_name))
image_annotations = annotations[image_id]
# Add the image using the base method from utils.Dataset
self.add_image(
source=source_name,
image_id=image_id,
path=image_path,
width=image_width,
height=image_height,
annotations=image_annotations
)
def load_mask(self, image_id):
""" Load instance masks for the given image.
MaskRCNN expects masks in the form of a bitmap [height, width, instances].
Args:
image_id: The id of the image to load masks for
Returns:
masks: A bool array of shape [height, width, instance count] with
one mask per instance.
class_ids: a 1D array of class IDs of the instance masks.
"""
image_info = self.image_info[image_id]
annotations = image_info['annotations']
instance_masks = []
class_ids = []
for annotation in annotations:
class_id = annotation['category_id']
mask = Image.new('1', (image_info['width'], image_info['height']))
mask_draw = ImageDraw.ImageDraw(mask, '1')
for segmentation in annotation['segmentation']:
mask_draw.polygon(segmentation, fill=1)
bool_array = np.array(mask) > 0
instance_masks.append(bool_array)
class_ids.append(class_id)
mask = np.dstack(instance_masks)
class_ids = np.array(class_ids, dtype=np.int32)
return mask, class_ids
This seems to work well at least in loading the images & masks in a presentable format, as testing it out to visualize the images, masks, and class ids for some of the images yields good results where I can see each image, binary mask, and class ids of each mask.
However, when I actually try to train the model on the training dataset created, I get the following error:
model = modellib.MaskRCNN(mode="training", config=config,
model_dir=MODEL_DIR)
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-19-7928c4edfc77> in <module>()
1 # Create model in training mode
2 model = modellib.MaskRCNN(mode="training", config=config,
----> 3 model_dir=MODEL_DIR)
3 frames
/content/Mask_RCNN/mrcnn/model.py in __init__(self, mode, config, model_dir)
1835 self.model_dir = model_dir
1836 self.set_log_dir()
-> 1837 self.keras_model = self.build(mode=mode, config=config)
1838
1839 def build(self, mode, config):
/content/Mask_RCNN/mrcnn/model.py in build(self, mode, config)
1927 # Anchors
1928 if mode == "training":
-> 1929 anchors = self.get_anchors(config.IMAGE_SHAPE)
1930 # Duplicate across the batch dimension because Keras requires it
1931 # TODO: can this be optimized to avoid duplicating the anchors?
/content/Mask_RCNN/mrcnn/model.py in get_anchors(self, image_shape)
2609 backbone_shapes,
2610 self.config.BACKBONE_STRIDES,
-> 2611 self.config.RPN_ANCHOR_STRIDE)
2612 # Keep a copy of the latest anchors in pixel coordinates because
2613 # it's used in inspect_model notebooks.
/content/Mask_RCNN/mrcnn/utils.py in generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, anchor_stride)
635 anchors = []
636 for i in range(len(scales)):
--> 637 anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
638 feature_strides[i], anchor_stride))
639 return np.concatenate(anchors, axis=0)
IndexError: index 5 is out of bounds for axis 0 with size 5
I have no clue what this error is actually indicating, nor what the potential solution might possibly be. I have a feeling it might have to do with how the data is being formatted & handled by the CocoLikeDataset class, but I'm not sure.
Any help with this identifying the issue and solving it is appreciated!
Thanks!
Set the RPN_ANCHOR_SCALE variable in your config file to:
RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)