I have a custom trained weights file and I wish to run predictions with it on single images.
I have initialized the model using the pytorch library and once I run the predictions on it, it returns a tuple of God knows what. I wish for it to return labels like what it would return when I run: python detect.py --source ../captcha.png --weights captcha_model.pt --save-txt --no-trace --exist-ok --project .. --name output --nosave
My code:
import torch
import torchvision.transforms as transforms
from PIL import Image
classes = ['M','Y','8','9','F','B','V','I','Q','H','4','P','T',
'C','W','A','K','G','N','L','5','6','2','0','Z','7','1','J','D','E',
'O','X','3','R']
def pre_image(image_path,model):
img = Image.open(image_path)
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
transform_norm = transforms.Compose([transforms.ToTensor(),
transforms.Resize((224,224)),transforms.Normalize(mean, std)])
img_normalized = transform_norm(img).float()
img_normalized = img_normalized.unsqueeze_(0)
img_normalized = img_normalized.to("cpu")
with torch.no_grad():
model.eval()
output = model(img_normalized)
return output
model = torch.hub.load("WongKinYiu/yolov7","custom","captcha_model.pt",trust_repo=True)
output = pre_image("captcha.png", model)
print(output)
Its output:
(tensor([[[-1.26430e+00, 2.61231e+00, 3.59347e+01, ..., 1.01391e-02, 2.46865e-02, 3.76955e-02],
[ 7.77992e+00, 3.27832e+00, 3.90596e+01, ..., 7.17988e-03, 1.89734e-02, 2.55351e-02],
[ 1.55948e+01, 6.01060e+00, 4.03760e+01, ..., 6.80088e-03, 1.94541e-02, 2.04232e-02],
...,
[ 1.48366e+02, 2.00934e+02, 1.02123e+03, ..., 2.46739e-02, 8.09314e-03, 1.13809e-02],
[ 1.84225e+02, 2.08909e+02, 1.01224e+03, ..., 2.44055e-02, 6.32820e-03, 7.09502e-03],
[ 2.13092e+02, 2.05269e+02, 6.91005e+02, ..., 2.27712e-02, 6.58516e-03, 4.77984e-03]]]), [tensor([[[[[-1.57869e+00, -3.50467e-01, 1.85951e+00, ..., -4.58117e+00, -3.67650e+00, -3.23979e+00],
[-1.17338e+00, -1.80912e-01, 2.22052e+00, ..., -4.92927e+00, -3.94556e+00, -3.64183e+00],
[-1.23863e+00, 5.13654e-01, 2.40425e+00, ..., -4.98388e+00, -3.92005e+00, -3.87045e+00],
...,
[-1.44293e+00, 1.19797e-01, 2.43755e+00, ..., -4.76797e+00, -3.84923e+00, -3.28092e+00],
[-1.21709e+00, 3.48075e-01, 2.28074e+00, ..., -5.22396e+00, -3.59381e+00, -2.93395e+00],
[-1.81546e+00, -4.95381e-02, 2.13998e+00, ..., -4.43276e+00, -3.90898e+00, -3.05755e+00]],
[[-1.33876e+00, 4.33999e-01, 1.77264e+00, ..., -5.10550e+00, -4.48759e+00, -2.86169e+00],
[-5.99240e-01, 6.20912e-02, 2.25098e+00, ..., -5.15602e+00, -4.78393e+00, -3.22067e+00],
[-8.69940e-01, 5.02609e-01, 2.12465e+00, ..., -5.56788e+00, -3.98619e+00, -3.18369e+00],
...,
.
.
.
.
[[ 5.62926e-01, -1.30771e+00, 6.28318e-02, ..., -5.44807e+00, -2.17277e+00, -5.25101e+00],
[ 3.79302e-01, -1.64500e+00, 3.82732e-01, ..., -6.31032e+00, -2.52180e+00, -5.23643e+00],
[-1.41712e-01, -1.54378e+00, 9.03876e-01, ..., -6.42302e+00, -2.80328e+00, -6.22176e+00],
...,
[-1.35747e-01, -1.81381e+00, 1.08393e+00, ..., -6.70534e+00, -3.46506e+00, -5.70917e+00],
[ 6.60871e-02, -1.64646e+00, 4.27860e-01, ..., -6
Desired output:
1 8, 1 I, 1 L, 1 X, Done. (824.8ms) Inference, (2.0ms) NMS
OR
(captcha.txt in the directory I specified when running the detect.py in cmd)
19 0.278333 0.449074 0.15 0.583333
7 0.636667 0.462963 0.146667 0.648148
2 0.135 0.467593 0.156667 0.583333
31 0.443333 0.444444 0.22 0.62963
Any help would be greatly appreciated, I have been at it for several weeks now and still can't seem to find a straightforward solution. Thanks in advance.
This code parses the returned tuple from the inference and for my case I wanted it to be arranged from left to right in the image I passed to the 'run()' function.
(The list name cat_and_pos stand for category_and_position.)
import torch
CLASSES = ['M','Y','8','9','F','B','V','I','Q','H','4','P','T',
'C','W','A','K','G','N','L','5','6','2','0','Z','7','1','J','D','E',
'O','X','3','R']
model = None
def start():
global model
model = torch.hub.load("WongKinYiu/yolov7","custom", "path/to/model.pt",
trust_repo=True)
def run(image_path):
global model
img = image_path
results = model(img, size = 640)
predictions = results.pred[0]
boxes = list(predictions[:, :4])
categories = [int(x) for x in list(predictions[:, 5])]
string = ''
cat_and_pos = []
for i in range(len(categories)):
box = boxes[i]
cat = CLASSES[categories[i]]
cat_and_pos.append((cat, float(box[0])))
cat_and_pos.sort(key= lambda x: x[1])
for cat,_ in cat_and_pos:
string+=cat
print(string)
return string