YOLOv8m-pose with ONNX weights to be compatible with Transformers.js.

Usage (Transformers.js)

If you haven't already, you can install the Transformers.js JavaScript library from NPM using:

npm i @xenova/transformers

Example: Perform pose-estimation w/ Xenova/yolov8m-pose.

import { AutoModel, AutoProcessor, RawImage } from '@xenova/transformers';

// Load model and processor
const model_id = 'Xenova/yolov8m-pose';
const model = await AutoModel.from_pretrained(model_id);
const processor = await AutoProcessor.from_pretrained(model_id);

// Read image and run processor
const url = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/football-match.jpg';
const image = await RawImage.read(url);
const { pixel_values } = await processor(image);

// Set thresholds
const threshold = 0.3; // Remove detections with low confidence
const iouThreshold = 0.5; // Used to remove duplicates
const pointThreshold = 0.3; // Hide uncertain points

// Predict bounding boxes and keypoints
const { output0 } = await model({ images: pixel_values });

// Post-process:
const permuted = output0[0].transpose(1, 0);
// `permuted` is a Tensor of shape [ 8400, 56 ]:
// - 8400 potential detections
// - 56 parameters for each box:
//   - 4 for the bounding box dimensions (x-center, y-center, width, height)
//   - 1 for the confidence score
//   - 17 * 3 = 51 for the pose keypoints: 17 labels, each with (x, y, visibilitiy)

// Example code to format it nicely:
const results = [];
const [scaledHeight, scaledWidth] = pixel_values.dims.slice(-2);
for (const [xc, yc, w, h, score, ...keypoints] of permuted.tolist()) {
    if (score < threshold) continue;

    // Get pixel values, taking into account the original image size
    const x1 = (xc - w / 2) / scaledWidth * image.width;
    const y1 = (yc - h / 2) / scaledHeight * image.height;
    const x2 = (xc + w / 2) / scaledWidth * image.width;
    const y2 = (yc + h / 2) / scaledHeight * image.height;
    results.push({ x1, x2, y1, y2, score, keypoints })
}


// Define helper functions
function removeDuplicates(detections, iouThreshold) {
    const filteredDetections = [];

    for (const detection of detections) {
        let isDuplicate = false;
        let duplicateIndex = -1;
        let maxIoU = 0;

        for (let i = 0; i < filteredDetections.length; ++i) {
            const filteredDetection = filteredDetections[i];
            const iou = calculateIoU(detection, filteredDetection);
            if (iou > iouThreshold) {
                isDuplicate = true;
                if (iou > maxIoU) {
                    maxIoU = iou;
                    duplicateIndex = i;
                }
            }
        }

        if (!isDuplicate) {
            filteredDetections.push(detection);
        } else if (duplicateIndex !== -1 && detection.score > filteredDetections[duplicateIndex].score) {
            filteredDetections[duplicateIndex] = detection;
        }
    }

    return filteredDetections;
}

function calculateIoU(detection1, detection2) {
    const xOverlap = Math.max(0, Math.min(detection1.x2, detection2.x2) - Math.max(detection1.x1, detection2.x1));
    const yOverlap = Math.max(0, Math.min(detection1.y2, detection2.y2) - Math.max(detection1.y1, detection2.y1));
    const overlapArea = xOverlap * yOverlap;

    const area1 = (detection1.x2 - detection1.x1) * (detection1.y2 - detection1.y1);
    const area2 = (detection2.x2 - detection2.x1) * (detection2.y2 - detection2.y1);
    const unionArea = area1 + area2 - overlapArea;

    return overlapArea / unionArea;
}

const filteredResults = removeDuplicates(results, iouThreshold);

// Display results
for (const { x1, x2, y1, y2, score, keypoints } of filteredResults) {
    console.log(`Found person at [${x1}, ${y1}, ${x2}, ${y2}] with score ${score.toFixed(3)}`)
    for (let i = 0; i < keypoints.length; i += 3) {
        const label = model.config.id2label[Math.floor(i / 3)];
        const [x, y, point_score] = keypoints.slice(i, i + 3);
        if (point_score < pointThreshold) continue;
        console.log(`  - ${label}: (${x.toFixed(2)}, ${y.toFixed(2)}) with score ${point_score.toFixed(3)}`);
    }
}
See example output
Found person at [535.503101348877, 39.878777217864986, 644.8351860046387, 346.3689248085022] with score 0.655
  - nose: (444.86, 91.25) with score 0.912
  - left_eye: (449.55, 79.71) with score 0.912
  - right_eye: (436.53, 82.54) with score 0.689
  - left_ear: (457.66, 83.08) with score 0.774
  - left_shoulder: (476.25, 126.43) with score 0.984
  - right_shoulder: (419.05, 129.94) with score 0.675
  - left_elbow: (495.99, 180.55) with score 0.960
  - left_wrist: (504.15, 233.96) with score 0.888
  - left_hip: (469.08, 227.61) with score 0.961
  - right_hip: (428.82, 228.95) with score 0.821
  - left_knee: (474.97, 301.15) with score 0.919
  - right_knee: (434.24, 305.24) with score 0.704
  - left_ankle: (467.31, 384.83) with score 0.625
  - right_ankle: (439.09, 379.35) with score 0.378
Found person at [-0.08985519409179688, 56.876064038276674, 158.62728118896484, 371.25909755229947] with score 0.902
  - nose: (61.15, 102.21) with score 0.979
  - left_eye: (66.59, 91.92) with score 0.939
  - right_eye: (51.35, 95.02) with score 0.905
  - left_ear: (70.82, 97.11) with score 0.778
  - right_ear: (48.08, 97.46) with score 0.655
  - left_shoulder: (84.60, 139.95) with score 0.997
  - right_shoulder: (38.36, 139.32) with score 0.996
  - left_elbow: (98.25, 196.80) with score 0.990
  - right_elbow: (24.83, 188.15) with score 0.981
  - left_wrist: (103.38, 252.91) with score 0.977
  - right_wrist: (9.42, 233.04) with score 0.965
  - left_hip: (82.91, 247.50) with score 0.999
  - right_hip: (51.28, 248.31) with score 0.999
  - left_knee: (85.25, 326.65) with score 0.997
  - right_knee: (49.12, 330.50) with score 0.996
  - left_ankle: (96.84, 419.45) with score 0.964
  - right_ankle: (51.88, 416.89) with score 0.960
Found person at [109.41852569580077, 13.203005981445314, 505.06954193115234, 532.9905454635621] with score 0.911
  - nose: (126.16, 102.84) with score 0.586
  - left_eye: (125.44, 84.07) with score 0.352
  - left_ear: (137.38, 77.79) with score 0.722
  - left_shoulder: (181.75, 122.32) with score 0.997
  - right_shoulder: (180.20, 152.15) with score 0.998
  - left_elbow: (262.31, 202.36) with score 0.996
  - right_elbow: (194.94, 277.60) with score 0.997
  - left_wrist: (298.87, 269.32) with score 0.987
  - right_wrist: (132.86, 281.44) with score 0.990
  - left_hip: (272.70, 284.47) with score 1.000
  - right_hip: (274.35, 307.48) with score 1.000
  - left_knee: (247.66, 441.74) with score 0.997
  - right_knee: (256.27, 500.82) with score 0.998
  - left_ankle: (340.54, 455.33) with score 0.848
  - right_ankle: (338.54, 543.24) with score 0.882
Found person at [425.35156250000006, 68.73829221725464, 640.3047943115234, 494.19192361831665] with score 0.901
  - nose: (425.40, 147.53) with score 0.995
  - left_eye: (432.33, 133.12) with score 0.985
  - right_eye: (410.70, 135.98) with score 0.969
  - left_ear: (440.72, 134.14) with score 0.901
  - right_ear: (400.69, 134.89) with score 0.800
  - left_shoulder: (455.11, 201.19) with score 1.000
  - right_shoulder: (368.64, 201.60) with score 0.999
  - left_elbow: (455.25, 292.03) with score 0.998
  - right_elbow: (350.65, 258.24) with score 0.989
  - left_wrist: (475.06, 370.36) with score 0.992
  - right_wrist: (398.78, 263.84) with score 0.975
  - left_hip: (441.94, 359.78) with score 1.000
  - right_hip: (384.06, 368.70) with score 1.000
  - left_knee: (462.74, 452.41) with score 0.998
  - right_knee: (395.50, 488.42) with score 0.997
  - left_ankle: (465.12, 540.38) with score 0.960
  - right_ankle: (433.43, 569.37) with score 0.938
Downloads last month
15
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no pipeline_tag.