본문 바로가기
AI/컴퓨터 비전

19. Mediapipe 활용(1) | 손 랜드마크 감지

by 사라리24 2025. 1. 3.



1. 손동작 인식하기

 

  Mediapipe 사이트 

✔링크: https://ai.google.dev/edge/mediapipe/solutions/guide?hl=ko

 

MediaPipe 솔루션 가이드  |  Google AI Edge  |  Google AI for Developers

LiteRT 소개: 온디바이스 AI를 위한 Google의 고성능 런타임(이전 명칭: TensorFlow Lite)입니다. 이 페이지는 Cloud Translation API를 통해 번역되었습니다. 의견 보내기 MediaPipe 솔루션 가이드 의견 보내기 달

ai.google.dev

손 랜드마크 감지 > Python 클릭

 

✔ 코랩파일 : https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/hand_landmarker/python/hand_landmarker.ipynb?hl=ko#scrollTo=Iy4r2_ePylIa

 

  가상환경 (아나콘다)

 

mediapipe 설치 (터미널)


       

       pip install mediapipe



 

 

 

off-the-shelf model bundle 설치 (터미널)


       

       curl -O https://storage.googleapis.com/mediapipe-models/hand_landmarker/hand_landmarker/float16/1/hand_landmarker.task



 

 

 

 

시각화 코드 


       
                #@markdown We implemented some functions to visualize the hand landmark detection results. <br/> Run the following cell to activate the functions.

                from mediapipe import solutions
                from mediapipe.framework.formats import landmark_pb2
                import numpy as np

                MARGIN = 10  # pixels
                FONT_SIZE = 1
                FONT_THICKNESS = 1
                HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

                def draw_landmarks_on_image(rgb_image, detection_result):
                hand_landmarks_list = detection_result.hand_landmarks
                handedness_list = detection_result.handedness
                annotated_image = np.copy(rgb_image)

                # Loop through the detected hands to visualize.
                for idx in range(len(hand_landmarks_list)):
                    hand_landmarks = hand_landmarks_list[idx]
                    handedness = handedness_list[idx]

                    # Draw the hand landmarks.
                    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
                    hand_landmarks_proto.landmark.extend([
                    landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
                    ])
                    solutions.drawing_utils.draw_landmarks(
                    annotated_image,
                    hand_landmarks_proto,
                    solutions.hands.HAND_CONNECTIONS,
                    solutions.drawing_styles.get_default_hand_landmarks_style(),
                    solutions.drawing_styles.get_default_hand_connections_style())

                    # Get the top left corner of the detected hand's bounding box.
                    height, width, _ = annotated_image.shape
                    x_coordinates = [landmark.x for landmark in hand_landmarks]
                    y_coordinates = [landmark.y for landmark in hand_landmarks]
                    text_x = int(min(x_coordinates) * width)
                    text_y = int(min(y_coordinates) * height) - MARGIN

                    # Draw handedness (left or right hand) on the image.
                    cv2.putText(annotated_image, f"{handedness[0].category_name}",
                                (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                                FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

                return annotated_image





 

 

 

테스트 이미지 download (터미널입력)


       
        curl -o image.jpg https://storage.googleapis.com/mediapipe-tasks/hand_landmarker/woman_hands.jpg

 

 

 

추론 실행 및 결과 시각화


       

            # STEP 1: Import the necessary modules.
            import mediapipe as mp
            from mediapipe.tasks import python
            from mediapipe.tasks.python import vision

            # STEP 2: Create an HandLandmarker object.
            base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
            options = vision.HandLandmarkerOptions(base_options=base_options,
                                                num_hands=2)
            detector = vision.HandLandmarker.create_from_options(options)

            # STEP 3: Load the input image.
            image = mp.Image.create_from_file("image.jpg")

            # STEP 4: Detect hand landmarks from the input image.
            detection_result = detector.detect(image)

            # STEP 5: Process the classification result. In this case, visualize it.
            annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
            cv2.imshow('Annotated image', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))
            cv2.waitKey(0)
            cv2.destroyAllWindows()



 

 

다양한 사진 넣어보기


       
            # STEP 3: Load the input image.
            image = mp.Image.create_from_file("image.jpg") ---> 해당 부분 수정


 


 웹캡으로 인식해보기


       
 
        import cv2
        import numpy as np
        from mediapipe import solutions
        from mediapipe.framework.formats import landmark_pb2
        import mediapipe as mp
        from mediapipe.tasks import python
        from mediapipe.tasks.python import vision

        # Constants
        MARGIN = 10  # pixels
        FONT_SIZE = 1
        FONT_THICKNESS = 1
        HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green

        def draw_landmarks_on_image(rgb_image, detection_result):
            hand_landmarks_list = detection_result.hand_landmarks
            handedness_list = detection_result.handedness
            annotated_image = np.copy(rgb_image)

            # Loop through the detected hands to visualize.
            for idx in range(len(hand_landmarks_list)):
                hand_landmarks = hand_landmarks_list[idx]
                handedness = handedness_list[idx]

                # Draw the hand landmarks.
                hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
                hand_landmarks_proto.landmark.extend([
                    landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
                ])
                solutions.drawing_utils.draw_landmarks(
                    annotated_image,
                    hand_landmarks_proto,
                    solutions.hands.HAND_CONNECTIONS,
                    solutions.drawing_styles.get_default_hand_landmarks_style(),
                    solutions.drawing_styles.get_default_hand_connections_style())

                # Get the top left corner of the detected hand's bounding box.
                height, width, _ = annotated_image.shape
                x_coordinates = [landmark.x for landmark in hand_landmarks]
                y_coordinates = [landmark.y for landmark in hand_landmarks]
                text_x = int(min(x_coordinates) * width)
                text_y = int(min(y_coordinates) * height) - MARGIN

                # Draw handedness (left or right hand) on the image.
                cv2.putText(annotated_image, f"{handedness[0].category_name}",
                            (text_x, text_y), cv2.FONT_HERSHEY_DUPLEX,
                            FONT_SIZE, HANDEDNESS_TEXT_COLOR, FONT_THICKNESS, cv2.LINE_AA)

            return annotated_image

        # STEP 1: Create a HandLandmarker object.
        base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
        options = vision.HandLandmarkerOptions(base_options=base_options,
                                            num_hands=2)
        detector = vision.HandLandmarker.create_from_options(options)

        # STEP 2: Open webcam using OpenCV.
        cap = cv2.VideoCapture(0)  # Use the first camera

        if not cap.isOpened():
            print("Error: Could not open webcam.")
            exit()

        # Set webcam resolution (increase the size of the video feed)
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)  # Set width
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)  # Set height

        while True:
            # STEP 3: Capture a frame from the webcam.
            ret, frame = cap.read()
            if not ret:
                print("Error: Failed to capture frame.")
                break
           
            # STEP 4: Convert the frame to the format needed by the model (RGB).
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
           
            # Mediapipe expects image in RGB format as numpy array.
            image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_frame)

            # STEP 5: Detect hand landmarks from the captured frame.
            detection_result = detector.detect(image)

            # STEP 6: Process the classification result (visualize the landmarks).
            annotated_image = draw_landmarks_on_image(rgb_frame, detection_result)

            # STEP 7: Display the annotated image.
            cv2.imshow('Annotated Image', cv2.cvtColor(annotated_image, cv2.COLOR_RGB2BGR))

            # STEP 8: Exit the loop when the user presses 'q'.
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

        # Release the webcam and close all OpenCV windows.
        cap.release()
        cv2.destroyAllWindows()