In object detection algorithms, Non-Maximum Suppression(NMS) is used to discard extra detection results for an object e.g. a vehicle.
Normally, horizontal bounding boxes are used in object detection algorithms and the GPU implementation of horizontal NMS already exists, but I wanted to have GPU implementation of rotated bounding boxes.
The CPU implementation has been already done, but I am struggling to convert CPU version to GPU version using CuPy package. Here is the code I have written. After the code section, you can see the error.
My question is what is the reason of TypeError: list indices must be integers or slices, not cupy.core.core.ndarray?
from shapely.geometry import Polygon as shpoly
import time
#### CPU implementation
import numpy as np
def polygon_iou(poly1, poly2):
"""
Intersection over union between two shapely polygons.
"""
if not poly1.intersects(poly2): # this test is fast and can accelerate calculation
iou = 0
else:
try:
inter_area = poly1.intersection(poly2).area
union_area = poly1.area + poly2.area - inter_area
iou = float(inter_area) / float(union_area)
except shapely.geos.TopologicalError:
warnings.warn("'shapely.geos.TopologicalError occured, iou set to 0'", UserWarning)
iou = 0
except ZeroDivisionError:
iou = 0
return iou
def polygon_from_array(poly_):
"""
Create a shapely polygon object from gt or dt line.
"""
polygon_points = np.array(poly_).reshape(4, 2)
polygon = shpoly(polygon_points).convex_hull
return polygon
def nms(dets, thresh):
scores = dets[:, 8]
order = scores.argsort()[::-1]
polys = []
areas = []
for i in range(len(dets)):
tm_polygon = polygon_from_array(dets[i,:8])
polys.append(tm_polygon)
keep = []
while order.size > 0:
ovr = []
i = order[0]
keep.append(i)
for j in range(order.size - 1):
iou = polygon_iou(polys[i], polys[order[j + 1]])
ovr.append(iou)
ovr = np.array(ovr)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
#### GPU implementation
import cupy as cp
def polygon_iou_gpu(poly1, poly2):
"""
Intersection over union between two shapely polygons.
"""
if not poly1.intersects(poly2): # this test is fast and can accelerate calculation
iou = 0
else:
try:
inter_area = poly1.intersection(poly2).area
union_area = poly1.area + poly2.area - inter_area
iou = float(inter_area) / float(union_area)
except shapely.geos.TopologicalError:
warnings.warn("'shapely.geos.TopologicalError occured, iou set to 0'", UserWarning)
iou = 0
except ZeroDivisionError:
iou = 0
return iou
def polygon_from_array_gpu(poly_):
"""
Create a shapely polygon object from gt or dt line.
"""
polygon_points = cp.array(poly_).reshape(4, 2)
polygon = shpoly(polygon_points).convex_hull
return polygon
def nms_gpu(dets, thresh):
scores = dets[:, 8]
order = scores.argsort()[::-1]
polys = []
areas = []
for i in range(len(dets)):
tm_polygon = polygon_from_array_gpu(dets[i,:8])
polys.append(tm_polygon)
keep = []
while order.size > 0:
ovr = []
i = order[0]
keep.append(i)
for j in range(order.size - 1):
iou = polygon_iou_gpu(polys[i], polys[order[j + 1]])
ovr.append(iou)
ovr = np.array(ovr)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
if __name__ == '__main__':
import random
boxes = np.random.randint(0,100,(1000,8))
scores = np.random.rand(1000, 1)
dets = np.hstack((boxes, scores[:])).astype(np.float32)
thresh = 0.1
start = time.time()
keep = nms(dets, thresh)
print("CPU implementation took: {}".format(time.time() - start))
cp.cuda.Device(1)
dets_gpu = cp.array(dets)
start = time.time()
keep = nms_gpu(dets_gpu, thresh)
print("GPU implementation took: {}".format(time.time() - start))
The error is
CPU implementation took: 0.3672311305999756
Traceback (most recent call last):
File "nms_rotated.py", line 117, in
keep = nms_gpu(dets_gpu, thresh)
File "nms_rotated.py", line 97, in nms_gpu
iou = polygon_iou_gpu(polys[i], polys[order[j + 1]])
TypeError: list indices must be integers or slices, not cupy.core.core.ndarray
Update: 13.02.2019 I tried @Yuki Hashimoto's answer
by replacing iou = polygon_iou_gpu(polys[i], polys[order[j + 1]])
with iou = polygon_iou_gpu(polys[i.get()], polys[order[j + 1].get()])
. It does not throw any errors, but the GPU version is multiple times slower than CPU version.
by using 100000 random detections:
CPU implementation took: 47.125494956970215 GPU implementation took: 142.08464860916138
In very short: use PFN's official non-maximum suppression.
Details:
Use cp.where
, which returns a list
object which matches some condition.
The corochann
's answer is not recommended, because polys
is a list, and list
should not be sliced by np.ndarray
either. (and injecting another dependency is not recommended...)
>>> polys[order.get()] # get method returns np.ndarray
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
TypeError: only integer scalar arrays can be converted to a scalar index
>>> polys[order[j + 1].get()]
### some result in some case, but this may fails depending on your env.###