pythonnameerrormediapipe

NameError: name 'mp_image' is not defined with MediaPipe gesture recognition


I am trying to utilize MediaPipe for real-time gesture recognition over a webcam. However, I want to use the gesture_recognizer.task model for inference. Here's my code:

import cv2
import mediapipe as mp
from mediapipe.tasks import python
from mediapipe.tasks.python import vision

model_path = "gesture_recognizer.task"
base_options = python.BaseOptions(model_asset_path=model_path)
GestureRecognizer = mp.tasks.vision.GestureRecognizer
GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
GestureRecognizerResult = mp.tasks.vision.GestureRecognizerResult
VisionRunningMode = mp.tasks.vision.RunningMode

def print_result(result: GestureRecognizerResult, output_image: mp.Image, timestamp_ms: int):
    print('gesture recognition result: {}'.format(result))

options = GestureRecognizerOptions(
    base_options=python.BaseOptions(model_asset_path=model_path),
    running_mode=VisionRunningMode.LIVE_STREAM,
    result_callback=print_result)
recognizer = GestureRecognizer.create_from_options(options)

mp_drawing = mp.solutions.drawing_utils
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(
        static_image_mode=False,
        max_num_hands=2,
        min_detection_confidence=0.65,
        min_tracking_confidence=0.65)

cap = cv2.VideoCapture(0)

while True:
    ret, frame = cap.read()
    if not ret:
        break
        
    i = 1  # left or right hand
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = hands.process(frame)
    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
    np_array = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    if results.multi_hand_landmarks:
        for hand_landmarks in results.multi_hand_landmarks:
            h, w, c = frame.shape
            mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np_array)
            results = recognizer.recognize_async(mp_image)
    
    # show the prediction on the frame
    cv2.putText(mp_image, results, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 
                   1, (0,0,255), 2, cv2.LINE_AA)
    cv2.imshow('MediaPipe Hands', frame)

    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()

I am getting a NameError: name 'mp_image' is not defined error on the line cv2.putText(mp_image, results, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,0,255), 2, cv2.LINE_AA). By now I am really confused and not sure what I am doing, let alone what I am doing wrong. Please help!


Solution

  • In addition to all the changes mentioned in the comments, you should also provide a "monotonically increasing" timestamp to the recognize_async() function. Since the recognize_callback cannot access the output image directly, I have used a lock and a shared field to share gesture data between the threads. This is a fully functional example (mediapipe==0.10.0):

    import cv2
    import mediapipe as mp
    from mediapipe.tasks import python
    import threading 
    
    class GestureRecognizer:
        def main(self):
            num_hands = 2
            model_path = "gesture_recognizer.task"
            GestureRecognizer = mp.tasks.vision.GestureRecognizer
            GestureRecognizerOptions = mp.tasks.vision.GestureRecognizerOptions
            VisionRunningMode = mp.tasks.vision.RunningMode
    
            self.lock = threading.Lock()
            self.current_gestures = []
            options = GestureRecognizerOptions(
                base_options=python.BaseOptions(model_asset_path=model_path),
                running_mode=VisionRunningMode.LIVE_STREAM,
                num_hands = num_hands,
                result_callback=self.__result_callback)
            recognizer = GestureRecognizer.create_from_options(options)
    
            timestamp = 0 
            mp_drawing = mp.solutions.drawing_utils
            mp_hands = mp.solutions.hands
            hands = mp_hands.Hands(
                    static_image_mode=False,
                    max_num_hands=num_hands,
                    min_detection_confidence=0.65,
                    min_tracking_confidence=0.65)
    
            cap = cv2.VideoCapture(0)
    
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                results = hands.process(frame)
                frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
                np_array = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                if results.multi_hand_landmarks:
                    for hand_landmarks in results.multi_hand_landmarks:
                        mp_drawing.draw_landmarks(frame, hand_landmarks, mp_hands.HAND_CONNECTIONS)
                        mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np_array)
                        recognizer.recognize_async(mp_image, timestamp)
                        timestamp = timestamp + 1 # should be monotonically increasing, because in LIVE_STREAM mode
                        
                    self.put_gestures(frame)
    
                cv2.imshow('MediaPipe Hands', frame)
                if cv2.waitKey(1) & 0xFF == 27:
                    break
    
            cap.release()
    
        def put_gestures(self, frame):
            self.lock.acquire()
            gestures = self.current_gestures
            self.lock.release()
            y_pos = 50
            for hand_gesture_name in gestures:
                # show the prediction on the frame
                cv2.putText(frame, hand_gesture_name, (10, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 
                                    1, (0,0,255), 2, cv2.LINE_AA)
                y_pos += 50
    
        def __result_callback(self, result, output_image, timestamp_ms):
            #print(f'gesture recognition result: {result}')
            self.lock.acquire() # solves potential concurrency issues
            self.current_gestures = []
            if result is not None and any(result.gestures):
                print("Recognized gestures:")
                for single_hand_gesture_data in result.gestures:
                    gesture_name = single_hand_gesture_data[0].category_name
                    print(gesture_name)
                    self.current_gestures.append(gesture_name)
            self.lock.release()
    
    if __name__ == "__main__":
        rec = GestureRecognizer()
        rec.main()
    

    Demo:

    enter image description here