File size: 6,112 Bytes
1bc3c94 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 |
using System;
using Unity.Mathematics;
using Unity.Sentis;
using UnityEngine;
public class HandDetection : MonoBehaviour
{
public HandPreview handPreview;
public ImagePreview imagePreview;
public Texture2D imageTexture;
public ModelAsset handDetector;
public ModelAsset handLandmarker;
public TextAsset anchorsCSV;
public float scoreThreshold = 0.5f;
const int k_NumAnchors = 2016;
float[,] m_Anchors;
const int k_NumKeypoints = 21;
const int detectorInputSize = 192;
const int landmarkerInputSize = 224;
Worker m_HandDetectorWorker;
Worker m_HandLandmarkerWorker;
Tensor<float> m_DetectorInput;
Tensor<float> m_LandmarkerInput;
Awaitable m_DetectAwaitable;
float m_TextureWidth;
float m_TextureHeight;
public async void Start()
{
m_Anchors = BlazeUtils.LoadAnchors(anchorsCSV.text, k_NumAnchors);
var handDetectorModel = ModelLoader.Load(handDetector);
// post process the model to filter scores + argmax select the best hand
var graph = new FunctionalGraph();
var input = graph.AddInput(handDetectorModel, 0);
var outputs = Functional.Forward(handDetectorModel, input);
var boxes = outputs[1]; // (1, 2016, 18)
var scores = outputs[0]; // (1, 2016, 1)
var idx_scores_boxes = BlazeUtils.ArgMaxFiltering(boxes, scores);
handDetectorModel = graph.Compile(idx_scores_boxes.Item1, idx_scores_boxes.Item2, idx_scores_boxes.Item3);
m_HandDetectorWorker = new Worker(handDetectorModel, BackendType.GPUCompute);
var handLandmarkerModel = ModelLoader.Load(handLandmarker);
m_HandLandmarkerWorker = new Worker(handLandmarkerModel, BackendType.GPUCompute);
m_DetectorInput = new Tensor<float>(new TensorShape(1, detectorInputSize, detectorInputSize, 3));
m_LandmarkerInput = new Tensor<float>(new TensorShape(1, landmarkerInputSize, landmarkerInputSize, 3));
while (true)
{
try
{
m_DetectAwaitable = Detect(imageTexture);
await m_DetectAwaitable;
}
catch (OperationCanceledException)
{
break;
}
}
m_HandDetectorWorker.Dispose();
m_HandLandmarkerWorker.Dispose();
m_DetectorInput.Dispose();
m_LandmarkerInput.Dispose();
}
Vector3 ImageToWorld(Vector2 position)
{
return (position - 0.5f * new Vector2(m_TextureWidth, m_TextureHeight)) / m_TextureHeight;
}
async Awaitable Detect(Texture texture)
{
m_TextureWidth = texture.width;
m_TextureHeight = texture.height;
imagePreview.SetTexture(texture);
var size = Mathf.Max(texture.width, texture.height);
// The affine transformation matrix to go from tensor coordinates to image coordinates
var scale = size / (float)detectorInputSize;
var M = BlazeUtils.mul(BlazeUtils.TranslationMatrix(0.5f * (new Vector2(texture.width, texture.height) + new Vector2(-size, size))), BlazeUtils.ScaleMatrix(new Vector2(scale, -scale)));
BlazeUtils.SampleImageAffine(texture, m_DetectorInput, M);
m_HandDetectorWorker.Schedule(m_DetectorInput);
var outputIdxAwaitable = (m_HandDetectorWorker.PeekOutput(0) as Tensor<int>).ReadbackAndCloneAsync();
var outputScoreAwaitable = (m_HandDetectorWorker.PeekOutput(1) as Tensor<float>).ReadbackAndCloneAsync();
var outputBoxAwaitable = (m_HandDetectorWorker.PeekOutput(2) as Tensor<float>).ReadbackAndCloneAsync();
using var outputIdx = await outputIdxAwaitable;
using var outputScore = await outputScoreAwaitable;
using var outputBox = await outputBoxAwaitable;
var scorePassesThreshold = outputScore[0] >= scoreThreshold;
handPreview.SetActive(scorePassesThreshold);
if (!scorePassesThreshold)
return;
var idx = outputIdx[0];
var anchorPosition = detectorInputSize * new float2(m_Anchors[idx, 0], m_Anchors[idx, 1]);
var boxCentre_TensorSpace = anchorPosition + new float2(outputBox[0, 0, 0], outputBox[0, 0, 1]);
var boxSize_TensorSpace = math.max(outputBox[0, 0, 2], outputBox[0, 0, 3]);
var kp0_TensorSpace = anchorPosition + new float2(outputBox[0, 0, 4 + 2 * 0 + 0], outputBox[0, 0, 4 + 2 * 0 + 1]);
var kp2_TensorSpace = anchorPosition + new float2(outputBox[0, 0, 4 + 2 * 2 + 0], outputBox[0, 0, 4 + 2 * 2 + 1]);
var delta_TensorSpace = kp2_TensorSpace - kp0_TensorSpace;
var up_TensorSpace = delta_TensorSpace / math.length(delta_TensorSpace);
var theta = math.atan2(delta_TensorSpace.y, delta_TensorSpace.x);
var rotation = 0.5f * Mathf.PI - theta;
boxCentre_TensorSpace += 0.5f * boxSize_TensorSpace * up_TensorSpace;
boxSize_TensorSpace *= 2.6f;
var origin2 = new float2(0.5f * landmarkerInputSize, 0.5f * landmarkerInputSize);
var scale2 = boxSize_TensorSpace / landmarkerInputSize;
var M2 = BlazeUtils.mul(M, BlazeUtils.mul(BlazeUtils.mul(BlazeUtils.mul(BlazeUtils.TranslationMatrix(boxCentre_TensorSpace), BlazeUtils.ScaleMatrix(new float2(scale2, -scale2))), BlazeUtils.RotationMatrix(rotation)), BlazeUtils.TranslationMatrix(-origin2)));
BlazeUtils.SampleImageAffine(texture, m_LandmarkerInput, M2);
m_HandLandmarkerWorker.Schedule(m_LandmarkerInput);
var landmarksAwaitable = (m_HandLandmarkerWorker.PeekOutput("Identity") as Tensor<float>).ReadbackAndCloneAsync();
using var landmarks = await landmarksAwaitable;
for (var i = 0; i < k_NumKeypoints; i++)
{
var position_ImageSpace = BlazeUtils.mul(M2, new float2(landmarks[3 * i + 0], landmarks[3 * i + 1]));
Vector3 position_WorldSpace = ImageToWorld(position_ImageSpace) + new Vector3(0, 0, landmarks[3 * i + 2] / m_TextureHeight);
handPreview.SetKeypoint(i, true, position_WorldSpace);
}
}
void OnDestroy()
{
m_DetectAwaitable.Cancel();
}
}
|