python-3.xpytorchimage-segmentationalbumentations

Albumentations returns "KeyError: 'labels'


This project is an implementation of the pytorch maskrcnn model for instance segmentation of cells.

The base model runs fine, but in order to increase the training sample, I attempted to implement the albumentation library.

Python: 3.10.6 Torch: 2.0.1+cu118 Numpy: 1.22.4 PIL: 9.4..0 Albumentation: 1.2.1

The following is placed under a Dataset Class, after the image is loaded from the computer's file directory as a numpy array:

class CellDataset(Dataset):
# images are jpgs while masks are pngs
  def __init__(self, image_dir, mask_dir, height = 250, width = 250, transform = None):
    self.image_dir = image_dir
    self.mask_dir = mask_dir
    self.transform = transform
    self.images = os.listdir(image_dir)
    self.masks = os.listdir(mask_dir)

  def __len__(self):
    return len(self.images)

  def __getitem__(self,index):
    assert len(os.listdir(image_dir))==len(os.listdir(mask_dir)), "Images and Masks folder size do not match"
    img_path = os.path.join(self.image_dir, self.images[index])
    mask_path = os.path.join(self.mask_dir, self.masks[index])

    if img_path[0].endswith(".npy"):
      image = np.load(img_path)
    else:
      image = np.array(Image.open(img_path).convert("RGB"))

    if mask_path[0].endswith(".npy"):
      mask = np.load(mask_path)
    else:
      mask = np.array(Image.open(mask_path))
    obj_ids = np.unique(mask)
    obj_ids = obj_ids[1:] # removing first item of list, which represents background
    num_objs = len(obj_ids) # finding number of objects (i.e. number of cells)

    # creates boolean mask of all objects together
    masks = np.zeros((num_objs, mask.shape[0], mask.shape[1]))
    for i in range(num_objs):
      masks[i][mask==i+1] = True

    # creates segmentation boxes based on rect coordinates
    boxes = []
    for i in range(num_objs):
      pos = np.where(masks[i]) # create new array with only data points of the object
      xmin = np.min(pos[1]) # find max/min of x & y axes
      xmax = np.max(pos[1])
      ymin = np.min(pos[0])
      ymax = np.max(pos[0])
      boxes.append([xmin, ymin, xmax, ymax])
    boxes = list(boxes) # boxes need to be in a list, according to article

    ## creating class labels
    class_labels = torch.ones((num_objs,),dtype=torch.int64)

    # Albumentation transformations
    if self.transform is not None:
      augmentations = self.transform(image=image, mask=mask, bboxes=boxes, class_labels=class_labels)
      image = augmentations["image"]
      mask = augmentations["mask"]
      boxes = augmentations["bboxes"]
      class_labels = augmentations["class_labels"]

    boxes = torch.as_tensor(boxes, dtype = torch.float32)
    masks = torch.as_tensor(masks, dtype=torch.uint8)

    target = {}
    target["boxes"] = boxes
    target["masks"] = masks
    target["class_labels"] = class_labels

    return image, target

A file is made to generate the train_loader and val_loader from a directory containing both training and validation data:

def get_loaders(
    train_img_dir,
    train_mask_dir,
    val_img_dir,
    val_mask_dir,
    batch_size,
    train_transform,
    val_transform,
    num_workers=4,
    pin_memory=True,
):
    train_ds = CellDataset(
        image_dir=train_img_dir,
        mask_dir=train_mask_dir,
        transform=train_transform,
    )

    train_loader = DataLoader(
        train_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=True,
        collate_fn = collate_fn
    )

    val_ds = CellDataset(
        image_dir=val_img_dir,
        mask_dir=val_mask_dir,
        transform=val_transform,
    )

    val_loader = DataLoader(
        val_ds,
        batch_size=batch_size,
        num_workers=num_workers,
        pin_memory=pin_memory,
        shuffle=False,
        collate_fn = collate_fn
    )

    return train_loader, val_loader

A seperate file loads the Dataset and runs the training loop:

model = torchvision.models.detection.maskrcnn_resnet50_fpn_v2()
bbox_params = A.BboxParams(format='pascal_voc',label_fields = [])

def main():
    # defining transformations. Will be accessed in dataloader
    train_transform = A.Compose([
        # A.Rotate(limit=35, p=1.0),
        A.RandomSizedBBoxSafeCrop(height=image_height, width=image_width,erosion_rate=0.2),
        A.Resize(height=image_height, width=image_width),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
            ),
        ToTensorV2()
        ],bbox_params=bbox_params, p=1)

    val_transforms = A.Compose([
        A.RandomSizedBBoxSafeCrop(height=image_height, width=image_width,erosion_rate=0.2),
        A.Resize(height=image_height, width=image_width),
        A.Normalize(
            mean=[0.0, 0.0, 0.0],
            std=[1.0, 1.0, 1.0],
            max_pixel_value=255.0,
            ),
        ToTensorV2()
        ],bbox_params=bbox_params, p=1)
    loss_fn = nn.BCEWithLogitsLoss()
    # optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate, momentum=momentum, weight_decay=weight_decay)

    train_loader, val_loader = get_loaders(
        train_img_dir,
        train_mask_dir,
        val_img_dir,
        val_mask_dir,
        batch_size,
        train_transform,
        val_transforms,
        num_workers,
        pin_memory,
    )
    scaler = torch.cuda.amp.GradScaler()
    model.to(device)

    for epoch in range(num_epochs):
        train_epoch_loss = 0
        val_epoch_loss = 0
        model.train()

        for i, dt in enumerate(train_loader):
          img = [dt[0][0].to(device), dt[1][0].to(device)]
          targ = [dt[0][1], dt[1][1]]
          targets = [{k: v.to(device) for k, v in t.items()} for t in targ]
          # targets = [{k: v for k, v in t.items()} for t in targ]
          loss = model(img, targets)
          print(loss)
          losses = sum([l for l in loss.values()])
          train_epoch_loss += losses.cpu().detach().numpy()
          optimizer.zero_grad()
          losses.backward()
          optimizer.step()
        all_train_losses.append(train_epoch_loss)
        with torch.no_grad():
          for j, dt in enumerate(val_loader):
            img = [dt[0][0].to(device), dt[1][0].to(device)]
            targ = [dt[0][1], dt[1][1]]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targ]
            # targets = [{k: v for k, v in t.items()} for t in targ]
            loss = model(img, targets)
            losses = sum([l for l in loss.values()])
            val_epoch_loss += losses.cpu().detach().numpy()
          all_val_losses.append(val_epoch_loss)
        print(epoch, " ", train_epoch_loss, " ", val_epoch_loss)

Resulting Error Message:

/usr/local/lib/python3.10/dist-packages/torch/cuda/amp/grad_scaler.py:120: UserWarning: torch.cuda.amp.GradScaler is enabled, but CUDA is not available.  Disabling.
  warnings.warn("torch.cuda.amp.GradScaler is enabled, but CUDA is not available.  Disabling.")

---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)

<ipython-input-8-692fc61b0d19> in <cell line: 162>()
    161 
    162 if __name__ == "__main__":
--> 163     main()

4 frames

<ipython-input-8-692fc61b0d19> in main()
    121           targets = [{k: v.to(device) for k, v in t.items()} for t in targ]
    122           # targets = [{k: v for k, v in t.items()} for t in targ]
--> 123           loss = model(img, targets)
    124           print(loss)
    125           losses = sum([l for l in loss.values()])

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1499                 or _global_backward_pre_hooks or _global_backward_hooks
   1500                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501             return forward_call(*args, **kwargs)
   1502         # Do not call functions when jit is used
   1503         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.10/dist-packages/torchvision/models/detection/generalized_rcnn.py in forward(self, images, targets)
    103             features = OrderedDict([("0", features)])
    104         proposals, proposal_losses = self.rpn(images, features, targets)
--> 105         detections, detector_losses = self.roi_heads(features, proposals, images.image_sizes, targets)
    106         detections = self.transform.postprocess(detections, images.image_sizes, original_image_sizes)  # type: ignore[operator]
    107 

/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py in _call_impl(self, *args, **kwargs)
   1499                 or _global_backward_pre_hooks or _global_backward_hooks
   1500                 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1501             return forward_call(*args, **kwargs)
   1502         # Do not call functions when jit is used
   1503         full_backward_hooks, non_full_backward_hooks = [], []

/usr/local/lib/python3.10/dist-packages/torchvision/models/detection/roi_heads.py in forward(self, features, proposals, image_shapes, targets)
    746                 if not t["boxes"].dtype in floating_point_types:
    747                     raise TypeError(f"target boxes must of float type, instead got {t['boxes'].dtype}")
--> 748                 if not t["labels"].dtype == torch.int64:
    749                     raise TypeError(f"target labels must of int64 type, instead got {t['labels'].dtype}")
    750                 if self.has_keypoint():

KeyError: 'labels'

The particular line that seeems to cause the error checks if the t["labels"].dtype is a torch.int64, yet the labels are formatted as such in the DataLoader class.


Solution

  • First be aware that your error is not that the datatype isn't int64; it's that the field 'labels' doesn't even exist at all. It doesn't get to the actual comparison.

    Apart from knowing that your targets clearly are missing this required field, you don't actually show enough of your code, as nowhere in the code is train_loader actually defined.

    Based on context, my most educated guess is that

        target = {}
        target["boxes"] = boxes
        target["masks"] = masks
        target["class_labels"] = class_labels
    

    is the targets part of train_loader, and if so then clearly the field "class_labels" is wrong; this model expects the field to be named exactly just "labels".