I was going through a chapter in an OpenCV book about handwritten number recognition, and though I went through it and i THINK everything got processed correctly, I get this error saying Expected 2D array, got 1D array instead
. I've attempted to google search an answer and it seems like a lot of other people have run into a very similar issue but with no real answer provided.
would anyone be able to clarify why this feature.hog()
method is not returning a 2D array? I was reading some of the documentation and it apparently by default it returns a flat 1D array, so i don't know why this model.predict()
method is complaining expects a 2d array. then again this book i am following i think was released in 2015 so maybe something changed?
this is the file i am trying to run:
classify.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 13:01:39 2020
@author: User
"""
from __future__ import print_function
from sklearn.externals import joblib
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse
import mahotas
import cv2
ap = argparse.ArgumentParser()
ap.add_argument('-m', '--model', required=True, help='Path to model')
ap.add_argument('-i', '--image', required=True, help='Path to image')
args=vars(ap.parse_args())
model = joblib.load(args['model'])
hog = HOG(orientations=18, pixelsPerCell=(10,10),
cellsPerBlock=(1,1), normalize=True)
image = cv2.imread(args["image"])
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
blurred = cv2.GaussianBlur(gray, (5, 5), 0)
edged = cv2.Canny(blurred, 30, 150)
(_, cnts, _) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
cnts = sorted([(c, cv2.boundingRect(c)[0]) for c in cnts], key=lambda x: x[1])
for (c, _) in cnts:
(x, y, w, h) = cv2.boundingRect(c)
if w >= 7 and h>= 20:
roi = gray[y:y+h, x:x+w]
thresh = roi.copy()
T = mahotas.thresholding.otsu(roi)
thresh[thresh > T] = 255
thresh = cv2.bitwise_not(thresh)
thresh = dataset.deskew(thresh, 72)
thresh = dataset.center_extent(thresh, (72, 72))
cv2.imshow("thresh", thresh)
hist = hog.describe(thresh)
digit = model.predict(hist)[0] #this is where it errors
print("I think that number is: {}".format(digit))
cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 1)
cv2.putText(image, str(digit), (x-10, y-10),
cv2.FONT_HERSHEY_SIMPLEX, 1.2, (0, 255, 0), 2)
cv2.imshow("image", image)
cv2.waitKey(0)
and here is the custom hog module that is written for this:
hog.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 11:22:38 2020
@author: User
"""
from skimage import feature
class HOG:
def __init__(self, orientations=9, pixelsPerCell=(14,14), cellsPerBlock=(1,1),
normalize=False):
self.orientations = orientations
self.pixelsPerCell = pixelsPerCell
self.cellsPerBlock = cellsPerBlock
self.normalize = normalize
def describe(self, image):
'''
(2017-11-28) Update for skimage: In scikit-image==0.12 , the
normalise parameter has been updated to transform_sqrt . The
transform_sqrt performs the exact same operation, only with a
different name. If you’re using an older version of scikit-image
(again, before the v0.12 release), then you’ll want to change
transform_sqrt to normalise . In scikit-image==0.15 the default
value of block_norm="L1" has been deprecated and changed to
block_norm="L2-Hys" . Therefore, for this lesson we’ll explicitly
specify block_norm="L1" . Doing this will avoid it switching to
"L2-Hys" with version updates without us knowing (and yielding
incorrect car logo identification results). You can read about L1 and
L2 norms here:
https://gurus.pyimagesearch.com/lesson-sample-histogram-of-oriented-gradients-and-car-logo-recognition/#tour_modal
'''
hist = feature.hog(image,
orientations=self.orientations,
pixels_per_cell=self.pixelsPerCell,
cells_per_block=self.cellsPerBlock,
transform_sqrt =self.normalize,
block_norm="L1")
return hist
here is what produced the "training model" for this:
train.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 11:57:26 2020
@author: User
"""
from sklearn.externals import joblib
from sklearn.svm import LinearSVC
from pyimagesearch.hog import HOG
from pyimagesearch import dataset
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('-d', '--dataset', required=True, help='Path to dataset')
ap.add_argument('-m', '--model', required=True, help='path to where model will be stored')
args=vars(ap.parse_args())
(digits, target) = dataset.load_digits(args['dataset'])
data = []
hog = HOG(orientations=9, pixelsPerCell=(14,14),
cellsPerBlock=(1,1), normalize=True)
for image in digits:
image = dataset.deskew(image, 20)
image = dataset.center_extent(image, (20, 20))
hist = hog.describe(image)
data.append(hist)
model = LinearSVC(random_state=42)
model.fit(data, target)
joblib.dump(model, args['model'])
dataset.py
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 3 11:35:04 2020
@author: User
"""
from . import imutils
import numpy as np
import mahotas
import cv2
def load_digits(datasetPath):
data = np.genfromtxt(datasetPath, delimiter=',', dtype='uint8')
target = data[:, 0]
data = data[:, 1:].reshape(data.shape[0], 28, 28)
return (data, target)
def deskew(image, width):
(h, w) = image.shape[:2]
moments = cv2.moments(image)
skew = moments['mu11'] / moments['mu02']
M = np.float32([
[1, skew, -0.5 * w * skew],
[0, 1, 0]])
image = cv2.warpAffine(image, M, (w, h),
flags = cv2.WARP_INVERSE_MAP | cv2.INTER_LINEAR)
image = imutils.resize(image, width=width)
return image
def center_extent(image, size):
(eW, eH) = size
if image.shape[1] > image.shape[0]:
image = imutils.resize(image, width=eW)
else:
image = imutils.resize(image, height=eH)
extent = np.zeros((eH, eW), dtype = 'uint8')
offsetX = (eW - image.shape[1]) // 2
offsetY = (eH - image.shape[0]) // 2
extent[offsetY:offsetY + image.shape[0],
offsetX:offsetX + image.shape[1]] = image
CM = mahotas.center_of_mass(extent)
(cY, cX) = np.round(CM).astype('int32')
(dX, dY) = ((size[0] // 2) - cX, (size[1] // 2) - cY)
M = np.float32([[1, 0, dX], [0, 1, dY]])
extent = cv2.warpAffine(extent, M, size)
return extent
and if its needed here is this custom imutils
module
imutils.py
# -*- coding: utf-8 -*-
"""
Created on Tue Sep 29 16:27:16 2020
@author: User
"""
import numpy as np
import cv2
def translate(image, x, y):
M = np.float32([[1, 0, x], [0, 1, y]])
shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
return shifted
def rotate(image, angle, center=None, scale=1.0):
(h, w) = image.shape[:2]
if not center:
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, angle, scale)
rotated = cv2.warpAffine(image, M, (w, h))
return rotated
def resize(image, width=None, height=None, inter=cv2.INTER_AREA):
dim = None
(h, w) = image.shape[:2]
if width is None and height is None:
return image
if width is None:
r = height / float(h)
dim = (int(w*r), height)
else:
r = width / float(w)
dim = (width, int(h*r))
resized = cv2.resize(image, dim, interpolation = inter)
return resized
and i am using this data found here (the train.csv
file), and i reduced it to 5000 rows via this script:
import pandas as pd
metadata = pd.read_csv('C:/Users/User/Downloads/digit-recognizer/train.csv', low_memory=False)
smaller_df = metadata.head(5000)
smaller_df.to_csv(path_or_buf='data/digits.csv', index=False)
print('successfully wrote a smaller file!')
i have figured this out for a while but now got a chance to post so i wanted to share my findings.
I think when i was trying to "reshape" the array i was actually reshaping the WRONG array, and thats why it kept giving me an error.
So i guess to convert the 1D array i had into 2d, i took this line: digit = model.predict(hist)[0]
and changed it to this: digit = model.predict(hist.reshape(1,-1))[0]