pythonpytorchobject-detectionyoloinference

Using Pytorch model for running detections


I have a custom trained weights file and I wish to run predictions with it on single images.

I have initialized the model using the pytorch library and once I run the predictions on it, it returns a tuple of God knows what. I wish for it to return labels like what it would return when I run: python detect.py --source ../captcha.png --weights captcha_model.pt --save-txt --no-trace --exist-ok --project .. --name output --nosave

My code:

import torch
import torchvision.transforms as transforms
from PIL import Image

classes = ['M','Y','8','9','F','B','V','I','Q','H','4','P','T',
'C','W','A','K','G','N','L','5','6','2','0','Z','7','1','J','D','E',
'O','X','3','R']

def pre_image(image_path,model):
  img = Image.open(image_path)
  mean = [0.485, 0.456, 0.406] 
  std = [0.229, 0.224, 0.225]
  transform_norm = transforms.Compose([transforms.ToTensor(), 
  transforms.Resize((224,224)),transforms.Normalize(mean, std)])
  img_normalized = transform_norm(img).float()
  img_normalized = img_normalized.unsqueeze_(0)
  img_normalized = img_normalized.to("cpu")
  with torch.no_grad():
    model.eval()  
    output = model(img_normalized)
    
    
    
  return output


model = torch.hub.load("WongKinYiu/yolov7","custom","captcha_model.pt",trust_repo=True)
output = pre_image("captcha.png", model)
print(output)

Its output:

(tensor([[[-1.26430e+00,  2.61231e+00,  3.59347e+01,  ...,  1.01391e-02,  2.46865e-02,  3.76955e-02],
         [ 7.77992e+00,  3.27832e+00,  3.90596e+01,  ...,  7.17988e-03,  1.89734e-02,  2.55351e-02],
         [ 1.55948e+01,  6.01060e+00,  4.03760e+01,  ...,  6.80088e-03,  1.94541e-02,  2.04232e-02],
         ...,
         [ 1.48366e+02,  2.00934e+02,  1.02123e+03,  ...,  2.46739e-02,  8.09314e-03,  1.13809e-02],
         [ 1.84225e+02,  2.08909e+02,  1.01224e+03,  ...,  2.44055e-02,  6.32820e-03,  7.09502e-03],
         [ 2.13092e+02,  2.05269e+02,  6.91005e+02,  ...,  2.27712e-02,  6.58516e-03,  4.77984e-03]]]), [tensor([[[[[-1.57869e+00, -3.50467e-01,  1.85951e+00,  ..., -4.58117e+00, -3.67650e+00, -3.23979e+00],
           [-1.17338e+00, -1.80912e-01,  2.22052e+00,  ..., -4.92927e+00, -3.94556e+00, -3.64183e+00],
           [-1.23863e+00,  5.13654e-01,  2.40425e+00,  ..., -4.98388e+00, -3.92005e+00, -3.87045e+00],
           ...,
           [-1.44293e+00,  1.19797e-01,  2.43755e+00,  ..., -4.76797e+00, -3.84923e+00, -3.28092e+00],
           [-1.21709e+00,  3.48075e-01,  2.28074e+00,  ..., -5.22396e+00, -3.59381e+00, -2.93395e+00],
           [-1.81546e+00, -4.95381e-02,  2.13998e+00,  ..., -4.43276e+00, -3.90898e+00, -3.05755e+00]],

          [[-1.33876e+00,  4.33999e-01,  1.77264e+00,  ..., -5.10550e+00, -4.48759e+00, -2.86169e+00],
           [-5.99240e-01,  6.20912e-02,  2.25098e+00,  ..., -5.15602e+00, -4.78393e+00, -3.22067e+00],
           [-8.69940e-01,  5.02609e-01,  2.12465e+00,  ..., -5.56788e+00, -3.98619e+00, -3.18369e+00],
           ...,
.
.
.
.

          [[ 5.62926e-01, -1.30771e+00,  6.28318e-02,  ..., -5.44807e+00, -2.17277e+00, -5.25101e+00],
           [ 3.79302e-01, -1.64500e+00,  3.82732e-01,  ..., -6.31032e+00, -2.52180e+00, -5.23643e+00],
           [-1.41712e-01, -1.54378e+00,  9.03876e-01,  ..., -6.42302e+00, -2.80328e+00, -6.22176e+00],
           ...,
           [-1.35747e-01, -1.81381e+00,  1.08393e+00,  ..., -6.70534e+00, -3.46506e+00, -5.70917e+00],
           [ 6.60871e-02, -1.64646e+00,  4.27860e-01,  ..., -6

Desired output:

1 8, 1 I, 1 L, 1 X, Done. (824.8ms) Inference, (2.0ms) NMS

OR

(captcha.txt in the directory I specified when running the detect.py in cmd)

19 0.278333 0.449074 0.15 0.583333
7 0.636667 0.462963 0.146667 0.648148
2 0.135 0.467593 0.156667 0.583333
31 0.443333 0.444444 0.22 0.62963

Any help would be greatly appreciated, I have been at it for several weeks now and still can't seem to find a straightforward solution. Thanks in advance.


Solution

  • This code parses the returned tuple from the inference and for my case I wanted it to be arranged from left to right in the image I passed to the 'run()' function.

    (The list name cat_and_pos stand for category_and_position.)

    import torch
    
    CLASSES = ['M','Y','8','9','F','B','V','I','Q','H','4','P','T',
    'C','W','A','K','G','N','L','5','6','2','0','Z','7','1','J','D','E',
    'O','X','3','R']
    
    
    model = None
    
    def start():
      global model  
      model = torch.hub.load("WongKinYiu/yolov7","custom", "path/to/model.pt", 
              trust_repo=True)
    
    def run(image_path):
      global model
      img = image_path
    
      results = model(img, size = 640)
      predictions = results.pred[0]
      boxes = list(predictions[:, :4])
      categories = [int(x) for x in list(predictions[:, 5])]
    
      string = ''
      cat_and_pos = []
    
      for i in range(len(categories)):
        box = boxes[i]
        cat = CLASSES[categories[i]]
        cat_and_pos.append((cat, float(box[0])))
    
      cat_and_pos.sort(key= lambda x: x[1])
    
      for cat,_ in cat_and_pos:
        string+=cat
    
      print(string)
      return string