haolongzhangm
commited on
Commit
·
a6a308d
1
Parent(s):
c4f16a5
feat(demo): add MegEngine demo example (#146)
Browse files- README.md +5 -4
- demo/MegEngine/cpp/README.md +122 -0
- demo/MegEngine/cpp/build.sh +61 -0
- demo/MegEngine/cpp/yolox.cpp +473 -0
- demo/MegEngine/python/README.md +33 -0
- demo/MegEngine/python/build.py +54 -0
- demo/MegEngine/python/coco_classes.py +86 -0
- demo/MegEngine/python/convert_weights.py +64 -0
- demo/MegEngine/python/demo.py +202 -0
- demo/MegEngine/python/dump.py +51 -0
- demo/MegEngine/python/models/__init__.py +9 -0
- demo/MegEngine/python/models/darknet.py +154 -0
- demo/MegEngine/python/models/network_blocks.py +183 -0
- demo/MegEngine/python/models/yolo_fpn.py +78 -0
- demo/MegEngine/python/models/yolo_head.py +193 -0
- demo/MegEngine/python/models/yolo_pafpn.py +111 -0
- demo/MegEngine/python/models/yolox.py +34 -0
- demo/MegEngine/python/process.py +76 -0
- demo/MegEngine/python/visualize.py +128 -0
README.md
CHANGED
@@ -152,10 +152,11 @@ python tools/eval.py -n yolox-s -c yolox_s.pth.tar -b 1 -d 1 --conf 0.001 --fp1
|
|
152 |
## Deployment
|
153 |
|
154 |
|
155 |
-
1. [
|
156 |
-
2. [
|
157 |
-
3. [
|
158 |
-
4. [
|
|
|
159 |
|
160 |
|
161 |
## Third-party resources
|
|
|
152 |
## Deployment
|
153 |
|
154 |
|
155 |
+
1. [MegEngine in C++ and Python](./demo/MegEngine)
|
156 |
+
2. [ONNX export and an ONNXRuntime](./demo/ONNXRuntime)
|
157 |
+
3. [TensorRT in C++ and Python](./demo/TensorRT)
|
158 |
+
4. [ncnn in C++ and Java](./demo/ncnn)
|
159 |
+
5. [OpenVINO in C++ and Python](./demo/OpenVINO)
|
160 |
|
161 |
|
162 |
## Third-party resources
|
demo/MegEngine/cpp/README.md
ADDED
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# YOLOX-CPP-MegEngine
|
2 |
+
|
3 |
+
Cpp file compile of YOLOX object detection base on [MegEngine](https://github.com/MegEngine/MegEngine).
|
4 |
+
|
5 |
+
## Tutorial
|
6 |
+
|
7 |
+
### Step1: install toolchain
|
8 |
+
|
9 |
+
* host: sudo apt install gcc/g++ (gcc/g++, which version >= 6) build-essential git git-lfs gfortran libgfortran-6-dev autoconf gnupg flex bison gperf curl zlib1g-dev gcc-multilib g++-multilib cmake
|
10 |
+
* cross build android: download [NDK](https://developer.android.com/ndk/downloads)
|
11 |
+
* after unzip download NDK, then export NDK_ROOT="path of NDK"
|
12 |
+
|
13 |
+
### Step2: build MegEngine
|
14 |
+
|
15 |
+
* git clone https://github.com/MegEngine/MegEngine.git
|
16 |
+
* then init third_party
|
17 |
+
* then export megengine_root="path of MegEngine"
|
18 |
+
* cd $megengine_root && ./third_party/prepare.sh && ./third_party/install-mkl.sh
|
19 |
+
* build example:
|
20 |
+
* build host without cuda: ./scripts/cmake-build/host_build.sh
|
21 |
+
* build host with cuda : ./scripts/cmake-build/host_build.sh -c
|
22 |
+
* cross build for android aarch64: ./scripts/cmake-build/cross_build_android_arm_inference.sh
|
23 |
+
* cross build for android aarch64(with V8.2+fp16): ./scripts/cmake-build/cross_build_android_arm_inference.sh -f
|
24 |
+
* after build MegEngine, you need export the `MGE_INSTALL_PATH`
|
25 |
+
* host without cuda: export MGE_INSTALL_PATH=${megengine_root}/build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_ON/Release/install
|
26 |
+
* host with cuda: export MGE_INSTALL_PATH=${megengine_root}/build_dir/host/MGE_WITH_CUDA_ON/MGE_INFERENCE_ONLY_ON/Release/install
|
27 |
+
* cross build for android aarch64: export MGE_INSTALL_PATH=${megengine_root}/build_dir/android/arm64-v8a/Release/install
|
28 |
+
* you can refs [build tutorial of MegEngine](https://github.com/MegEngine/MegEngine/blob/master/scripts/cmake-build/BUILD_README.md) to build other platform, eg, windows/macos/ etc!
|
29 |
+
|
30 |
+
### Step3: build OpenCV
|
31 |
+
* git clone https://github.com/opencv/opencv.git
|
32 |
+
|
33 |
+
* git checkout 3.4.15 (we test at 3.4.15, if test other version, may need modify some build)
|
34 |
+
|
35 |
+
* patch diff for android:
|
36 |
+
|
37 |
+
* ```
|
38 |
+
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
39 |
+
index f6a2da5310..10354312c9 100644
|
40 |
+
--- a/CMakeLists.txt
|
41 |
+
+++ b/CMakeLists.txt
|
42 |
+
@@ -643,7 +643,7 @@ if(UNIX)
|
43 |
+
if(NOT APPLE)
|
44 |
+
CHECK_INCLUDE_FILE(pthread.h HAVE_PTHREAD)
|
45 |
+
if(ANDROID)
|
46 |
+
- set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m log)
|
47 |
+
+ set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} dl m log z)
|
48 |
+
elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD|NetBSD|DragonFly|OpenBSD|Haiku")
|
49 |
+
set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} m pthread)
|
50 |
+
elseif(EMSCRIPTEN)
|
51 |
+
|
52 |
+
```
|
53 |
+
|
54 |
+
* build for host
|
55 |
+
|
56 |
+
* ```
|
57 |
+
cd root_dir_of_opencv
|
58 |
+
mkdir -p build/install
|
59 |
+
cd build
|
60 |
+
cmake -DBUILD_JAVA=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=$PWD/install
|
61 |
+
make install -j32
|
62 |
+
```
|
63 |
+
|
64 |
+
* build for android-aarch64
|
65 |
+
|
66 |
+
* ```
|
67 |
+
cd root_dir_of_opencv
|
68 |
+
mkdir -p build_android/install
|
69 |
+
cd build_android
|
70 |
+
|
71 |
+
cmake -DCMAKE_TOOLCHAIN_FILE="$NDK_ROOT/build/cmake/android.toolchain.cmake" -DANDROID_NDK="$NDK_ROOT" -DANDROID_ABI=arm64-v8a -DANDROID_NATIVE_API_LEVEL=21 -DBUILD_JAVA=OFF -DBUILD_ANDROID_PROJECTS=OFF -DBUILD_ANDROID_EXAMPLES=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=$PWD/install ..
|
72 |
+
|
73 |
+
make install -j32
|
74 |
+
```
|
75 |
+
|
76 |
+
* after build OpenCV, you need export `OPENCV_INSTALL_INCLUDE_PATH ` and `OPENCV_INSTALL_LIB_PATH`
|
77 |
+
|
78 |
+
* host build:
|
79 |
+
* export OPENCV_INSTALL_INCLUDE_PATH=${path of opencv}/build/install/include
|
80 |
+
* export OPENCV_INSTALL_LIB_PATH=${path of opencv}/build/install/lib
|
81 |
+
* cross build for android aarch64:
|
82 |
+
* export OPENCV_INSTALL_INCLUDE_PATH=${path of opencv}/build_android/install/sdk/native/jni/include
|
83 |
+
* export OPENCV_INSTALL_LIB_PATH=${path of opencv}/build_android/install/sdk/native/libs/arm64-v8a
|
84 |
+
|
85 |
+
### Step4: build test demo
|
86 |
+
|
87 |
+
* run build.sh
|
88 |
+
* host:
|
89 |
+
* export CXX=g++
|
90 |
+
* ./build.sh
|
91 |
+
* cross android aarch64
|
92 |
+
* export CXX=aarch64-linux-android21-clang++
|
93 |
+
* ./build.sh
|
94 |
+
|
95 |
+
### Step5: run demo
|
96 |
+
|
97 |
+
> **Note**: two ways to get `yolox_s.mge` model file
|
98 |
+
>
|
99 |
+
> * reference to python demo's `dump.py` script.
|
100 |
+
> * wget https://github.com/Megvii-BaseDetection/storage/releases/download/0.0.1/yolox_s.mge
|
101 |
+
|
102 |
+
* host:
|
103 |
+
* LD_LIBRARY_PATH=$MGE_INSTALL_PATH/lib/:$OPENCV_INSTALL_LIB_PATH ./yolox yolox_s.mge ../../../assets/dog.jpg cuda/cpu/multithread <warmup_count> <thread_number>
|
104 |
+
* cross android
|
105 |
+
* adb push/scp $MGE_INSTALL_PATH/lib/libmegengine.so android_phone
|
106 |
+
* adb push/scp $OPENCV_INSTALL_LIB_PATH/*.so android_phone
|
107 |
+
* adb push/scp ./yolox yolox_s.mge android_phone
|
108 |
+
* adb push/scp ../../../assets/dog.jpg android_phone
|
109 |
+
* login in android_phone by adb or ssh
|
110 |
+
* then run: LD_LIBRARY_PATH=. ./yolox yolox_s.mge dog.jpg cpu/multithread <warmup_count> <thread_number> <use_fast_run> <use_weight_preprocess> <run_with_fp16>
|
111 |
+
* <warmup_count> means warmup count, valid number >=0
|
112 |
+
* <thread_number> means thread number, valid number >=1, only take effect `multithread` device
|
113 |
+
* <use_fast_run> if >=1 , will use fastrun to choose best algo
|
114 |
+
* <use_weight_preprocess> if >=1, will handle weight preprocess before exe
|
115 |
+
* <run_with_fp16> if >=1, will run with fp16 mode
|
116 |
+
|
117 |
+
## Acknowledgement
|
118 |
+
|
119 |
+
* [MegEngine](https://github.com/MegEngine/MegEngine)
|
120 |
+
* [OpenCV](https://github.com/opencv/opencv)
|
121 |
+
* [NDK](https://developer.android.com/ndk)
|
122 |
+
* [CMAKE](https://cmake.org/)
|
demo/MegEngine/cpp/build.sh
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env bash
|
2 |
+
set -e
|
3 |
+
|
4 |
+
if [ -z $CXX ];then
|
5 |
+
echo "please export you c++ toolchain to CXX"
|
6 |
+
echo "for example:"
|
7 |
+
echo "build for host: export CXX=g++"
|
8 |
+
echo "cross build for aarch64-android(always locate in NDK): export CXX=aarch64-linux-android21-clang++"
|
9 |
+
echo "cross build for aarch64-linux: export CXX=aarch64-linux-gnu-g++"
|
10 |
+
exit -1
|
11 |
+
fi
|
12 |
+
|
13 |
+
if [ -z $MGE_INSTALL_PATH ];then
|
14 |
+
echo "please refsi ./README.md to init MGE_INSTALL_PATH env"
|
15 |
+
exit -1
|
16 |
+
fi
|
17 |
+
|
18 |
+
if [ -z $OPENCV_INSTALL_INCLUDE_PATH ];then
|
19 |
+
echo "please refs ./README.md to init OPENCV_INSTALL_INCLUDE_PATH env"
|
20 |
+
exit -1
|
21 |
+
fi
|
22 |
+
|
23 |
+
if [ -z $OPENCV_INSTALL_LIB_PATH ];then
|
24 |
+
echo "please refs ./README.md to init OPENCV_INSTALL_LIB_PATH env"
|
25 |
+
exit -1
|
26 |
+
fi
|
27 |
+
|
28 |
+
INCLUDE_FLAG="-I$MGE_INSTALL_PATH/include -I$OPENCV_INSTALL_INCLUDE_PATH"
|
29 |
+
LINK_FLAG="-L$MGE_INSTALL_PATH/lib/ -lmegengine -L$OPENCV_INSTALL_LIB_PATH -lopencv_core -lopencv_highgui -lopencv_imgproc -lopencv_imgcodecs"
|
30 |
+
BUILD_FLAG="-static-libstdc++ -O3 -pie -fPIE -g"
|
31 |
+
|
32 |
+
if [[ $CXX =~ "android" ]]; then
|
33 |
+
LINK_FLAG="${LINK_FLAG} -llog -lz"
|
34 |
+
fi
|
35 |
+
|
36 |
+
echo "CXX: $CXX"
|
37 |
+
echo "MGE_INSTALL_PATH: $MGE_INSTALL_PATH"
|
38 |
+
echo "INCLUDE_FLAG: $INCLUDE_FLAG"
|
39 |
+
echo "LINK_FLAG: $LINK_FLAG"
|
40 |
+
echo "BUILD_FLAG: $BUILD_FLAG"
|
41 |
+
|
42 |
+
echo "[" > compile_commands.json
|
43 |
+
echo "{" >> compile_commands.json
|
44 |
+
echo "\"directory\": \"$PWD\"," >> compile_commands.json
|
45 |
+
echo "\"command\": \"$CXX yolox.cpp -o yolox ${INCLUDE_FLAG} ${LINK_FLAG}\"," >> compile_commands.json
|
46 |
+
echo "\"file\": \"$PWD/yolox.cpp\"," >> compile_commands.json
|
47 |
+
echo "}," >> compile_commands.json
|
48 |
+
echo "]" >> compile_commands.json
|
49 |
+
$CXX yolox.cpp -o yolox ${INCLUDE_FLAG} ${LINK_FLAG} ${BUILD_FLAG}
|
50 |
+
|
51 |
+
echo "build success, output file: yolox"
|
52 |
+
if [[ $CXX =~ "android" ]]; then
|
53 |
+
echo "try command to run:"
|
54 |
+
echo "adb push/scp $MGE_INSTALL_PATH/lib/libmegengine.so android_phone"
|
55 |
+
echo "adb push/scp $OPENCV_INSTALL_LIB_PATH/*.so android_phone"
|
56 |
+
echo "adb push/scp ./yolox yolox_s.mge android_phone"
|
57 |
+
echo "adb push/scp ../../../assets/dog.jpg android_phone"
|
58 |
+
echo "adb/ssh to android_phone, then run: LD_LIBRARY_PATH=. ./yolox yolox_s.mge dog.jpg cpu/multithread <warmup_count> <thread_number> <use_fast_run> <use_weight_preprocess>"
|
59 |
+
else
|
60 |
+
echo "try command to run: LD_LIBRARY_PATH=$MGE_INSTALL_PATH/lib/:$OPENCV_INSTALL_LIB_PATH ./yolox yolox_s.mge ../../../assets/dog.jpg cuda/cpu/multithread <warmup_count> <thread_number> <use_fast_run> <use_weight_preprocess>"
|
61 |
+
fi
|
demo/MegEngine/cpp/yolox.cpp
ADDED
@@ -0,0 +1,473 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
// Copyright (C) 2018-2021 Intel Corporation
|
2 |
+
// SPDX-License-Identifier: Apache-2.0
|
3 |
+
|
4 |
+
#include "megbrain/gopt/inference.h"
|
5 |
+
#include "megbrain/opr/search_policy/algo_chooser_helper.h"
|
6 |
+
#include "megbrain/serialization/serializer.h"
|
7 |
+
#include <iostream>
|
8 |
+
#include <iterator>
|
9 |
+
#include <memory>
|
10 |
+
#include <opencv2/opencv.hpp>
|
11 |
+
#include <stdlib.h>
|
12 |
+
#include <string>
|
13 |
+
#include <vector>
|
14 |
+
|
15 |
+
/**
|
16 |
+
* @brief Define names based depends on Unicode path support
|
17 |
+
*/
|
18 |
+
#define NMS_THRESH 0.65
|
19 |
+
#define BBOX_CONF_THRESH 0.3
|
20 |
+
|
21 |
+
constexpr int INPUT_W = 640;
|
22 |
+
constexpr int INPUT_H = 640;
|
23 |
+
|
24 |
+
using namespace mgb;
|
25 |
+
|
26 |
+
cv::Mat static_resize(cv::Mat &img) {
|
27 |
+
float r = std::min(INPUT_W / (img.cols * 1.0), INPUT_H / (img.rows * 1.0));
|
28 |
+
int unpad_w = r * img.cols;
|
29 |
+
int unpad_h = r * img.rows;
|
30 |
+
cv::Mat re(unpad_h, unpad_w, CV_8UC3);
|
31 |
+
cv::resize(img, re, re.size());
|
32 |
+
cv::Mat out(INPUT_W, INPUT_H, CV_8UC3, cv::Scalar(114, 114, 114));
|
33 |
+
re.copyTo(out(cv::Rect(0, 0, re.cols, re.rows)));
|
34 |
+
return out;
|
35 |
+
}
|
36 |
+
|
37 |
+
void blobFromImage(cv::Mat &img, float *blob_data) {
|
38 |
+
cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
|
39 |
+
int channels = 3;
|
40 |
+
int img_h = img.rows;
|
41 |
+
int img_w = img.cols;
|
42 |
+
std::vector<float> mean = {0.485, 0.456, 0.406};
|
43 |
+
std::vector<float> std = {0.229, 0.224, 0.225};
|
44 |
+
for (size_t c = 0; c < channels; c++) {
|
45 |
+
for (size_t h = 0; h < img_h; h++) {
|
46 |
+
for (size_t w = 0; w < img_w; w++) {
|
47 |
+
blob_data[c * img_w * img_h + h * img_w + w] =
|
48 |
+
(((float)img.at<cv::Vec3b>(h, w)[c]) / 255.0f - mean[c]) / std[c];
|
49 |
+
}
|
50 |
+
}
|
51 |
+
}
|
52 |
+
}
|
53 |
+
|
54 |
+
struct Object {
|
55 |
+
cv::Rect_<float> rect;
|
56 |
+
int label;
|
57 |
+
float prob;
|
58 |
+
};
|
59 |
+
|
60 |
+
struct GridAndStride {
|
61 |
+
int grid0;
|
62 |
+
int grid1;
|
63 |
+
int stride;
|
64 |
+
};
|
65 |
+
|
66 |
+
static void
|
67 |
+
generate_grids_and_stride(const int target_size, std::vector<int> &strides,
|
68 |
+
std::vector<GridAndStride> &grid_strides) {
|
69 |
+
for (auto stride : strides) {
|
70 |
+
int num_grid = target_size / stride;
|
71 |
+
for (int g1 = 0; g1 < num_grid; g1++) {
|
72 |
+
for (int g0 = 0; g0 < num_grid; g0++) {
|
73 |
+
grid_strides.push_back((GridAndStride){g0, g1, stride});
|
74 |
+
}
|
75 |
+
}
|
76 |
+
}
|
77 |
+
}
|
78 |
+
|
79 |
+
static void generate_yolox_proposals(std::vector<GridAndStride> grid_strides,
|
80 |
+
const float *feat_ptr,
|
81 |
+
float prob_threshold,
|
82 |
+
std::vector<Object> &objects) {
|
83 |
+
const int num_class = 80;
|
84 |
+
const int num_anchors = grid_strides.size();
|
85 |
+
|
86 |
+
for (int anchor_idx = 0; anchor_idx < num_anchors; anchor_idx++) {
|
87 |
+
const int grid0 = grid_strides[anchor_idx].grid0;
|
88 |
+
const int grid1 = grid_strides[anchor_idx].grid1;
|
89 |
+
const int stride = grid_strides[anchor_idx].stride;
|
90 |
+
|
91 |
+
const int basic_pos = anchor_idx * 85;
|
92 |
+
|
93 |
+
float x_center = (feat_ptr[basic_pos + 0] + grid0) * stride;
|
94 |
+
float y_center = (feat_ptr[basic_pos + 1] + grid1) * stride;
|
95 |
+
float w = exp(feat_ptr[basic_pos + 2]) * stride;
|
96 |
+
float h = exp(feat_ptr[basic_pos + 3]) * stride;
|
97 |
+
float x0 = x_center - w * 0.5f;
|
98 |
+
float y0 = y_center - h * 0.5f;
|
99 |
+
|
100 |
+
float box_objectness = feat_ptr[basic_pos + 4];
|
101 |
+
for (int class_idx = 0; class_idx < num_class; class_idx++) {
|
102 |
+
float box_cls_score = feat_ptr[basic_pos + 5 + class_idx];
|
103 |
+
float box_prob = box_objectness * box_cls_score;
|
104 |
+
if (box_prob > prob_threshold) {
|
105 |
+
Object obj;
|
106 |
+
obj.rect.x = x0;
|
107 |
+
obj.rect.y = y0;
|
108 |
+
obj.rect.width = w;
|
109 |
+
obj.rect.height = h;
|
110 |
+
obj.label = class_idx;
|
111 |
+
obj.prob = box_prob;
|
112 |
+
|
113 |
+
objects.push_back(obj);
|
114 |
+
}
|
115 |
+
|
116 |
+
} // class loop
|
117 |
+
|
118 |
+
} // point anchor loop
|
119 |
+
}
|
120 |
+
|
121 |
+
static inline float intersection_area(const Object &a, const Object &b) {
|
122 |
+
cv::Rect_<float> inter = a.rect & b.rect;
|
123 |
+
return inter.area();
|
124 |
+
}
|
125 |
+
|
126 |
+
static void qsort_descent_inplace(std::vector<Object> &faceobjects, int left,
|
127 |
+
int right) {
|
128 |
+
int i = left;
|
129 |
+
int j = right;
|
130 |
+
float p = faceobjects[(left + right) / 2].prob;
|
131 |
+
|
132 |
+
while (i <= j) {
|
133 |
+
while (faceobjects[i].prob > p)
|
134 |
+
i++;
|
135 |
+
|
136 |
+
while (faceobjects[j].prob < p)
|
137 |
+
j--;
|
138 |
+
|
139 |
+
if (i <= j) {
|
140 |
+
// swap
|
141 |
+
std::swap(faceobjects[i], faceobjects[j]);
|
142 |
+
|
143 |
+
i++;
|
144 |
+
j--;
|
145 |
+
}
|
146 |
+
}
|
147 |
+
|
148 |
+
#pragma omp parallel sections
|
149 |
+
{
|
150 |
+
#pragma omp section
|
151 |
+
{
|
152 |
+
if (left < j)
|
153 |
+
qsort_descent_inplace(faceobjects, left, j);
|
154 |
+
}
|
155 |
+
#pragma omp section
|
156 |
+
{
|
157 |
+
if (i < right)
|
158 |
+
qsort_descent_inplace(faceobjects, i, right);
|
159 |
+
}
|
160 |
+
}
|
161 |
+
}
|
162 |
+
|
163 |
+
static void qsort_descent_inplace(std::vector<Object> &objects) {
|
164 |
+
if (objects.empty())
|
165 |
+
return;
|
166 |
+
|
167 |
+
qsort_descent_inplace(objects, 0, objects.size() - 1);
|
168 |
+
}
|
169 |
+
|
170 |
+
static void nms_sorted_bboxes(const std::vector<Object> &faceobjects,
|
171 |
+
std::vector<int> &picked, float nms_threshold) {
|
172 |
+
picked.clear();
|
173 |
+
|
174 |
+
const int n = faceobjects.size();
|
175 |
+
|
176 |
+
std::vector<float> areas(n);
|
177 |
+
for (int i = 0; i < n; i++) {
|
178 |
+
areas[i] = faceobjects[i].rect.area();
|
179 |
+
}
|
180 |
+
|
181 |
+
for (int i = 0; i < n; i++) {
|
182 |
+
const Object &a = faceobjects[i];
|
183 |
+
|
184 |
+
int keep = 1;
|
185 |
+
for (int j = 0; j < (int)picked.size(); j++) {
|
186 |
+
const Object &b = faceobjects[picked[j]];
|
187 |
+
|
188 |
+
// intersection over union
|
189 |
+
float inter_area = intersection_area(a, b);
|
190 |
+
float union_area = areas[i] + areas[picked[j]] - inter_area;
|
191 |
+
// float IoU = inter_area / union_area
|
192 |
+
if (inter_area / union_area > nms_threshold)
|
193 |
+
keep = 0;
|
194 |
+
}
|
195 |
+
|
196 |
+
if (keep)
|
197 |
+
picked.push_back(i);
|
198 |
+
}
|
199 |
+
}
|
200 |
+
|
201 |
+
static void decode_outputs(const float *prob, std::vector<Object> &objects,
|
202 |
+
float scale, const int img_w, const int img_h) {
|
203 |
+
std::vector<Object> proposals;
|
204 |
+
std::vector<int> strides = {8, 16, 32};
|
205 |
+
std::vector<GridAndStride> grid_strides;
|
206 |
+
|
207 |
+
generate_grids_and_stride(INPUT_W, strides, grid_strides);
|
208 |
+
generate_yolox_proposals(grid_strides, prob, BBOX_CONF_THRESH, proposals);
|
209 |
+
qsort_descent_inplace(proposals);
|
210 |
+
|
211 |
+
std::vector<int> picked;
|
212 |
+
nms_sorted_bboxes(proposals, picked, NMS_THRESH);
|
213 |
+
int count = picked.size();
|
214 |
+
objects.resize(count);
|
215 |
+
|
216 |
+
for (int i = 0; i < count; i++) {
|
217 |
+
objects[i] = proposals[picked[i]];
|
218 |
+
|
219 |
+
// adjust offset to original unpadded
|
220 |
+
float x0 = (objects[i].rect.x) / scale;
|
221 |
+
float y0 = (objects[i].rect.y) / scale;
|
222 |
+
float x1 = (objects[i].rect.x + objects[i].rect.width) / scale;
|
223 |
+
float y1 = (objects[i].rect.y + objects[i].rect.height) / scale;
|
224 |
+
|
225 |
+
// clip
|
226 |
+
x0 = std::max(std::min(x0, (float)(img_w - 1)), 0.f);
|
227 |
+
y0 = std::max(std::min(y0, (float)(img_h - 1)), 0.f);
|
228 |
+
x1 = std::max(std::min(x1, (float)(img_w - 1)), 0.f);
|
229 |
+
y1 = std::max(std::min(y1, (float)(img_h - 1)), 0.f);
|
230 |
+
|
231 |
+
objects[i].rect.x = x0;
|
232 |
+
objects[i].rect.y = y0;
|
233 |
+
objects[i].rect.width = x1 - x0;
|
234 |
+
objects[i].rect.height = y1 - y0;
|
235 |
+
}
|
236 |
+
}
|
237 |
+
|
238 |
+
const float color_list[80][3] = {
|
239 |
+
{0.000, 0.447, 0.741}, {0.850, 0.325, 0.098}, {0.929, 0.694, 0.125},
|
240 |
+
{0.494, 0.184, 0.556}, {0.466, 0.674, 0.188}, {0.301, 0.745, 0.933},
|
241 |
+
{0.635, 0.078, 0.184}, {0.300, 0.300, 0.300}, {0.600, 0.600, 0.600},
|
242 |
+
{1.000, 0.000, 0.000}, {1.000, 0.500, 0.000}, {0.749, 0.749, 0.000},
|
243 |
+
{0.000, 1.000, 0.000}, {0.000, 0.000, 1.000}, {0.667, 0.000, 1.000},
|
244 |
+
{0.333, 0.333, 0.000}, {0.333, 0.667, 0.000}, {0.333, 1.000, 0.000},
|
245 |
+
{0.667, 0.333, 0.000}, {0.667, 0.667, 0.000}, {0.667, 1.000, 0.000},
|
246 |
+
{1.000, 0.333, 0.000}, {1.000, 0.667, 0.000}, {1.000, 1.000, 0.000},
|
247 |
+
{0.000, 0.333, 0.500}, {0.000, 0.667, 0.500}, {0.000, 1.000, 0.500},
|
248 |
+
{0.333, 0.000, 0.500}, {0.333, 0.333, 0.500}, {0.333, 0.667, 0.500},
|
249 |
+
{0.333, 1.000, 0.500}, {0.667, 0.000, 0.500}, {0.667, 0.333, 0.500},
|
250 |
+
{0.667, 0.667, 0.500}, {0.667, 1.000, 0.500}, {1.000, 0.000, 0.500},
|
251 |
+
{1.000, 0.333, 0.500}, {1.000, 0.667, 0.500}, {1.000, 1.000, 0.500},
|
252 |
+
{0.000, 0.333, 1.000}, {0.000, 0.667, 1.000}, {0.000, 1.000, 1.000},
|
253 |
+
{0.333, 0.000, 1.000}, {0.333, 0.333, 1.000}, {0.333, 0.667, 1.000},
|
254 |
+
{0.333, 1.000, 1.000}, {0.667, 0.000, 1.000}, {0.667, 0.333, 1.000},
|
255 |
+
{0.667, 0.667, 1.000}, {0.667, 1.000, 1.000}, {1.000, 0.000, 1.000},
|
256 |
+
{1.000, 0.333, 1.000}, {1.000, 0.667, 1.000}, {0.333, 0.000, 0.000},
|
257 |
+
{0.500, 0.000, 0.000}, {0.667, 0.000, 0.000}, {0.833, 0.000, 0.000},
|
258 |
+
{1.000, 0.000, 0.000}, {0.000, 0.167, 0.000}, {0.000, 0.333, 0.000},
|
259 |
+
{0.000, 0.500, 0.000}, {0.000, 0.667, 0.000}, {0.000, 0.833, 0.000},
|
260 |
+
{0.000, 1.000, 0.000}, {0.000, 0.000, 0.167}, {0.000, 0.000, 0.333},
|
261 |
+
{0.000, 0.000, 0.500}, {0.000, 0.000, 0.667}, {0.000, 0.000, 0.833},
|
262 |
+
{0.000, 0.000, 1.000}, {0.000, 0.000, 0.000}, {0.143, 0.143, 0.143},
|
263 |
+
{0.286, 0.286, 0.286}, {0.429, 0.429, 0.429}, {0.571, 0.571, 0.571},
|
264 |
+
{0.714, 0.714, 0.714}, {0.857, 0.857, 0.857}, {0.000, 0.447, 0.741},
|
265 |
+
{0.314, 0.717, 0.741}, {0.50, 0.5, 0}};
|
266 |
+
|
267 |
+
static void draw_objects(const cv::Mat &bgr,
|
268 |
+
const std::vector<Object> &objects) {
|
269 |
+
static const char *class_names[] = {
|
270 |
+
"person", "bicycle", "car",
|
271 |
+
"motorcycle", "airplane", "bus",
|
272 |
+
"train", "truck", "boat",
|
273 |
+
"traffic light", "fire hydrant", "stop sign",
|
274 |
+
"parking meter", "bench", "bird",
|
275 |
+
"cat", "dog", "horse",
|
276 |
+
"sheep", "cow", "elephant",
|
277 |
+
"bear", "zebra", "giraffe",
|
278 |
+
"backpack", "umbrella", "handbag",
|
279 |
+
"tie", "suitcase", "frisbee",
|
280 |
+
"skis", "snowboard", "sports ball",
|
281 |
+
"kite", "baseball bat", "baseball glove",
|
282 |
+
"skateboard", "surfboard", "tennis racket",
|
283 |
+
"bottle", "wine glass", "cup",
|
284 |
+
"fork", "knife", "spoon",
|
285 |
+
"bowl", "banana", "apple",
|
286 |
+
"sandwich", "orange", "broccoli",
|
287 |
+
"carrot", "hot dog", "pizza",
|
288 |
+
"donut", "cake", "chair",
|
289 |
+
"couch", "potted plant", "bed",
|
290 |
+
"dining table", "toilet", "tv",
|
291 |
+
"laptop", "mouse", "remote",
|
292 |
+
"keyboard", "cell phone", "microwave",
|
293 |
+
"oven", "toaster", "sink",
|
294 |
+
"refrigerator", "book", "clock",
|
295 |
+
"vase", "scissors", "teddy bear",
|
296 |
+
"hair drier", "toothbrush"};
|
297 |
+
|
298 |
+
cv::Mat image = bgr.clone();
|
299 |
+
|
300 |
+
for (size_t i = 0; i < objects.size(); i++) {
|
301 |
+
const Object &obj = objects[i];
|
302 |
+
|
303 |
+
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
|
304 |
+
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
|
305 |
+
|
306 |
+
cv::Scalar color =
|
307 |
+
cv::Scalar(color_list[obj.label][0], color_list[obj.label][1],
|
308 |
+
color_list[obj.label][2]);
|
309 |
+
float c_mean = cv::mean(color)[0];
|
310 |
+
cv::Scalar txt_color;
|
311 |
+
if (c_mean > 0.5) {
|
312 |
+
txt_color = cv::Scalar(0, 0, 0);
|
313 |
+
} else {
|
314 |
+
txt_color = cv::Scalar(255, 255, 255);
|
315 |
+
}
|
316 |
+
|
317 |
+
cv::rectangle(image, obj.rect, color * 255, 2);
|
318 |
+
|
319 |
+
char text[256];
|
320 |
+
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
|
321 |
+
|
322 |
+
int baseLine = 0;
|
323 |
+
cv::Size label_size =
|
324 |
+
cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine);
|
325 |
+
|
326 |
+
cv::Scalar txt_bk_color = color * 0.7 * 255;
|
327 |
+
|
328 |
+
int x = obj.rect.x;
|
329 |
+
int y = obj.rect.y + 1;
|
330 |
+
// int y = obj.rect.y - label_size.height - baseLine;
|
331 |
+
if (y > image.rows)
|
332 |
+
y = image.rows;
|
333 |
+
// if (x + label_size.width > image.cols)
|
334 |
+
// x = image.cols - label_size.width;
|
335 |
+
|
336 |
+
cv::rectangle(
|
337 |
+
image,
|
338 |
+
cv::Rect(cv::Point(x, y),
|
339 |
+
cv::Size(label_size.width, label_size.height + baseLine)),
|
340 |
+
txt_bk_color, -1);
|
341 |
+
|
342 |
+
cv::putText(image, text, cv::Point(x, y + label_size.height),
|
343 |
+
cv::FONT_HERSHEY_SIMPLEX, 0.4, txt_color, 1);
|
344 |
+
}
|
345 |
+
|
346 |
+
cv::imwrite("out.jpg", image);
|
347 |
+
std::cout << "save output to out.jpg" << std::endl;
|
348 |
+
}
|
349 |
+
|
350 |
+
cg::ComputingGraph::OutputSpecItem make_callback_copy(SymbolVar dev,
|
351 |
+
HostTensorND &host) {
|
352 |
+
auto cb = [&host](DeviceTensorND &d) { host.copy_from(d); };
|
353 |
+
return {dev, cb};
|
354 |
+
}
|
355 |
+
|
356 |
+
int main(int argc, char *argv[]) {
|
357 |
+
serialization::GraphLoader::LoadConfig load_config;
|
358 |
+
load_config.comp_graph = ComputingGraph::make();
|
359 |
+
auto &&graph_opt = load_config.comp_graph->options();
|
360 |
+
graph_opt.graph_opt_level = 0;
|
361 |
+
|
362 |
+
if (argc != 9) {
|
363 |
+
std::cout << "Usage : " << argv[0]
|
364 |
+
<< " <path_to_model> <path_to_image> <device> <warmup_count> "
|
365 |
+
"<thread_number> <use_fast_run> <use_weight_preprocess> "
|
366 |
+
"<run_with_fp16>"
|
367 |
+
<< std::endl;
|
368 |
+
return EXIT_FAILURE;
|
369 |
+
}
|
370 |
+
|
371 |
+
const std::string input_model{argv[1]};
|
372 |
+
const std::string input_image_path{argv[2]};
|
373 |
+
const std::string device{argv[3]};
|
374 |
+
const size_t warmup_count = atoi(argv[4]);
|
375 |
+
const size_t thread_number = atoi(argv[5]);
|
376 |
+
const size_t use_fast_run = atoi(argv[6]);
|
377 |
+
const size_t use_weight_preprocess = atoi(argv[7]);
|
378 |
+
const size_t run_with_fp16 = atoi(argv[8]);
|
379 |
+
|
380 |
+
if (device == "cuda") {
|
381 |
+
load_config.comp_node_mapper = [](CompNode::Locator &loc) {
|
382 |
+
loc.type = CompNode::DeviceType::CUDA;
|
383 |
+
};
|
384 |
+
} else if (device == "cpu") {
|
385 |
+
load_config.comp_node_mapper = [](CompNode::Locator &loc) {
|
386 |
+
loc.type = CompNode::DeviceType::CPU;
|
387 |
+
};
|
388 |
+
} else if (device == "multithread") {
|
389 |
+
load_config.comp_node_mapper = [thread_number](CompNode::Locator &loc) {
|
390 |
+
loc.type = CompNode::DeviceType::MULTITHREAD;
|
391 |
+
loc.device = 0;
|
392 |
+
loc.stream = thread_number;
|
393 |
+
};
|
394 |
+
std::cout << "use " << thread_number << " thread" << std::endl;
|
395 |
+
} else {
|
396 |
+
std::cout << "device only support cuda or cpu or multithread" << std::endl;
|
397 |
+
return EXIT_FAILURE;
|
398 |
+
}
|
399 |
+
|
400 |
+
if (use_weight_preprocess) {
|
401 |
+
std::cout << "use weight preprocess" << std::endl;
|
402 |
+
graph_opt.graph_opt.enable_weight_preprocess();
|
403 |
+
}
|
404 |
+
if (run_with_fp16) {
|
405 |
+
std::cout << "run with fp16" << std::endl;
|
406 |
+
graph_opt.graph_opt.enable_f16_io_comp();
|
407 |
+
}
|
408 |
+
|
409 |
+
if (device == "cuda") {
|
410 |
+
std::cout << "choose format for cuda" << std::endl;
|
411 |
+
} else {
|
412 |
+
std::cout << "choose format for non-cuda" << std::endl;
|
413 |
+
#if defined(__arm__) || defined(__aarch64__)
|
414 |
+
if (run_with_fp16) {
|
415 |
+
std::cout << "use chw format when enable fp16" << std::endl;
|
416 |
+
} else {
|
417 |
+
std::cout << "choose format for nchw44 for aarch64" << std::endl;
|
418 |
+
graph_opt.graph_opt.enable_nchw44();
|
419 |
+
}
|
420 |
+
#endif
|
421 |
+
#if defined(__x86_64__) || defined(__amd64__) || defined(__i386__)
|
422 |
+
// graph_opt.graph_opt.enable_nchw88();
|
423 |
+
#endif
|
424 |
+
}
|
425 |
+
|
426 |
+
std::unique_ptr<serialization::InputFile> inp_file =
|
427 |
+
serialization::InputFile::make_fs(input_model.c_str());
|
428 |
+
auto loader = serialization::GraphLoader::make(std::move(inp_file));
|
429 |
+
serialization::GraphLoader::LoadResult network =
|
430 |
+
loader->load(load_config, false);
|
431 |
+
|
432 |
+
if (use_fast_run) {
|
433 |
+
std::cout << "use fastrun" << std::endl;
|
434 |
+
using S = opr::mixin::AlgoChooserHelper::ExecutionPolicy::Strategy;
|
435 |
+
S strategy = static_cast<S>(0);
|
436 |
+
strategy = S::PROFILE | S::OPTIMIZED | strategy;
|
437 |
+
mgb::gopt::modify_opr_algo_strategy_inplace(network.output_var_list,
|
438 |
+
strategy);
|
439 |
+
}
|
440 |
+
|
441 |
+
auto data = network.tensor_map["data"];
|
442 |
+
cv::Mat image = cv::imread(input_image_path);
|
443 |
+
cv::Mat pr_img = static_resize(image);
|
444 |
+
float *data_ptr = data->resize({1, 3, 640, 640}).ptr<float>();
|
445 |
+
blobFromImage(pr_img, data_ptr);
|
446 |
+
HostTensorND predict;
|
447 |
+
std::unique_ptr<cg::AsyncExecutable> func = network.graph->compile(
|
448 |
+
{make_callback_copy(network.output_var_map.begin()->second, predict)});
|
449 |
+
|
450 |
+
for (auto i = 0; i < warmup_count; i++) {
|
451 |
+
std::cout << "warmup: " << i << std::endl;
|
452 |
+
func->execute();
|
453 |
+
func->wait();
|
454 |
+
}
|
455 |
+
auto start = std::chrono::system_clock::now();
|
456 |
+
func->execute();
|
457 |
+
func->wait();
|
458 |
+
auto end = std::chrono::system_clock::now();
|
459 |
+
std::chrono::duration<double> exec_seconds = end - start;
|
460 |
+
std::cout << "elapsed time: " << exec_seconds.count() << "s" << std::endl;
|
461 |
+
|
462 |
+
float *predict_ptr = predict.ptr<float>();
|
463 |
+
int img_w = image.cols;
|
464 |
+
int img_h = image.rows;
|
465 |
+
float scale =
|
466 |
+
std::min(INPUT_W / (image.cols * 1.0), INPUT_H / (image.rows * 1.0));
|
467 |
+
std::vector<Object> objects;
|
468 |
+
|
469 |
+
decode_outputs(predict_ptr, objects, scale, img_w, img_h);
|
470 |
+
draw_objects(image, objects);
|
471 |
+
|
472 |
+
return EXIT_SUCCESS;
|
473 |
+
}
|
demo/MegEngine/python/README.md
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# YOLOX-Python-MegEngine
|
2 |
+
|
3 |
+
Python version of YOLOX object detection base on [MegEngine](https://github.com/MegEngine/MegEngine).
|
4 |
+
|
5 |
+
## Tutorial
|
6 |
+
|
7 |
+
### Step1: install requirements
|
8 |
+
|
9 |
+
```
|
10 |
+
python3 -m pip install megengine -f https://megengine.org.cn/whl/mge.html
|
11 |
+
```
|
12 |
+
|
13 |
+
### Step2: convert checkpoint weights from torch's path file
|
14 |
+
|
15 |
+
```
|
16 |
+
python3 convert_weights.py -w yolox_s.pth.tar -o yolox_s_mge.pkl
|
17 |
+
```
|
18 |
+
|
19 |
+
### Step3: run demo
|
20 |
+
|
21 |
+
This part is the same as torch's python demo, but no need to specify device.
|
22 |
+
|
23 |
+
```
|
24 |
+
python3 demo.py image -n yolox-s -c yolox_s_mge.pkl --path ../../../assets/dog.jpg --conf 0.3 --nms 0.65 --tsize 640 --save_result
|
25 |
+
```
|
26 |
+
|
27 |
+
### [Optional]Step4: dump model for cpp inference
|
28 |
+
|
29 |
+
> **Note**: result model is dumped with `optimize_for_inference` and `enable_fuse_conv_bias_nonlinearity`.
|
30 |
+
|
31 |
+
```
|
32 |
+
python3 dump.py -n yolox-s -c yolox_s_mge.pkl --dump_path yolox_s.mge
|
33 |
+
```
|
demo/MegEngine/python/build.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
|
4 |
+
import megengine as mge
|
5 |
+
import megengine.module as M
|
6 |
+
from megengine import jit
|
7 |
+
|
8 |
+
from models.yolo_fpn import YOLOFPN
|
9 |
+
from models.yolo_head import YOLOXHead
|
10 |
+
from models.yolo_pafpn import YOLOPAFPN
|
11 |
+
from models.yolox import YOLOX
|
12 |
+
|
13 |
+
|
14 |
+
def build_yolox(name="yolox-s"):
|
15 |
+
num_classes = 80
|
16 |
+
|
17 |
+
# value meaning: depth, width
|
18 |
+
param_dict = {
|
19 |
+
"yolox-nano": (0.33, 0.25),
|
20 |
+
"yolox-tiny": (0.33, 0.375),
|
21 |
+
"yolox-s": (0.33, 0.50),
|
22 |
+
"yolox-m": (0.67, 0.75),
|
23 |
+
"yolox-l": (1.0, 1.0),
|
24 |
+
"yolox-x": (1.33, 1.25),
|
25 |
+
}
|
26 |
+
if name == "yolov3":
|
27 |
+
depth = 1.0
|
28 |
+
width = 1.0
|
29 |
+
backbone = YOLOFPN()
|
30 |
+
head = YOLOXHead(num_classes, width, in_channels=[128, 256, 512], act="lrelu")
|
31 |
+
model = YOLOX(backbone, head)
|
32 |
+
else:
|
33 |
+
assert name in param_dict
|
34 |
+
kwargs = {}
|
35 |
+
depth, width = param_dict[name]
|
36 |
+
if name == "yolox-nano":
|
37 |
+
kwargs["depthwise"] = True
|
38 |
+
in_channels = [256, 512, 1024]
|
39 |
+
backbone = YOLOPAFPN(depth, width, in_channels=in_channels, **kwargs)
|
40 |
+
head = YOLOXHead(num_classes, width, in_channels=in_channels, **kwargs)
|
41 |
+
model = YOLOX(backbone, head)
|
42 |
+
|
43 |
+
for m in model.modules():
|
44 |
+
if isinstance(m, M.BatchNorm2d):
|
45 |
+
m.eps = 1e-3
|
46 |
+
|
47 |
+
return model
|
48 |
+
|
49 |
+
|
50 |
+
def build_and_load(weight_file, name="yolox-s"):
|
51 |
+
model = build_yolox(name)
|
52 |
+
model_weights = mge.load(weight_file)
|
53 |
+
model.load_state_dict(model_weights, strict=False)
|
54 |
+
return model
|
demo/MegEngine/python/coco_classes.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
COCO_CLASSES = (
|
6 |
+
"person",
|
7 |
+
"bicycle",
|
8 |
+
"car",
|
9 |
+
"motorcycle",
|
10 |
+
"airplane",
|
11 |
+
"bus",
|
12 |
+
"train",
|
13 |
+
"truck",
|
14 |
+
"boat",
|
15 |
+
"traffic light",
|
16 |
+
"fire hydrant",
|
17 |
+
"stop sign",
|
18 |
+
"parking meter",
|
19 |
+
"bench",
|
20 |
+
"bird",
|
21 |
+
"cat",
|
22 |
+
"dog",
|
23 |
+
"horse",
|
24 |
+
"sheep",
|
25 |
+
"cow",
|
26 |
+
"elephant",
|
27 |
+
"bear",
|
28 |
+
"zebra",
|
29 |
+
"giraffe",
|
30 |
+
"backpack",
|
31 |
+
"umbrella",
|
32 |
+
"handbag",
|
33 |
+
"tie",
|
34 |
+
"suitcase",
|
35 |
+
"frisbee",
|
36 |
+
"skis",
|
37 |
+
"snowboard",
|
38 |
+
"sports ball",
|
39 |
+
"kite",
|
40 |
+
"baseball bat",
|
41 |
+
"baseball glove",
|
42 |
+
"skateboard",
|
43 |
+
"surfboard",
|
44 |
+
"tennis racket",
|
45 |
+
"bottle",
|
46 |
+
"wine glass",
|
47 |
+
"cup",
|
48 |
+
"fork",
|
49 |
+
"knife",
|
50 |
+
"spoon",
|
51 |
+
"bowl",
|
52 |
+
"banana",
|
53 |
+
"apple",
|
54 |
+
"sandwich",
|
55 |
+
"orange",
|
56 |
+
"broccoli",
|
57 |
+
"carrot",
|
58 |
+
"hot dog",
|
59 |
+
"pizza",
|
60 |
+
"donut",
|
61 |
+
"cake",
|
62 |
+
"chair",
|
63 |
+
"couch",
|
64 |
+
"potted plant",
|
65 |
+
"bed",
|
66 |
+
"dining table",
|
67 |
+
"toilet",
|
68 |
+
"tv",
|
69 |
+
"laptop",
|
70 |
+
"mouse",
|
71 |
+
"remote",
|
72 |
+
"keyboard",
|
73 |
+
"cell phone",
|
74 |
+
"microwave",
|
75 |
+
"oven",
|
76 |
+
"toaster",
|
77 |
+
"sink",
|
78 |
+
"refrigerator",
|
79 |
+
"book",
|
80 |
+
"clock",
|
81 |
+
"vase",
|
82 |
+
"scissors",
|
83 |
+
"teddy bear",
|
84 |
+
"hair drier",
|
85 |
+
"toothbrush",
|
86 |
+
)
|
demo/MegEngine/python/convert_weights.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
import argparse
|
4 |
+
from collections import OrderedDict
|
5 |
+
|
6 |
+
import megengine as mge
|
7 |
+
import torch
|
8 |
+
|
9 |
+
|
10 |
+
def make_parser():
|
11 |
+
parser = argparse.ArgumentParser()
|
12 |
+
parser.add_argument("-w", "--weights", type=str, help="path of weight file")
|
13 |
+
parser.add_argument(
|
14 |
+
"-o",
|
15 |
+
"--output",
|
16 |
+
default="weight_mge.pkl",
|
17 |
+
type=str,
|
18 |
+
help="path of weight file",
|
19 |
+
)
|
20 |
+
return parser
|
21 |
+
|
22 |
+
|
23 |
+
def numpy_weights(weight_file):
|
24 |
+
torch_weights = torch.load(weight_file, map_location="cpu")
|
25 |
+
if "model" in torch_weights:
|
26 |
+
torch_weights = torch_weights["model"]
|
27 |
+
new_dict = OrderedDict()
|
28 |
+
for k, v in torch_weights.items():
|
29 |
+
new_dict[k] = v.cpu().numpy()
|
30 |
+
return new_dict
|
31 |
+
|
32 |
+
|
33 |
+
def map_weights(weight_file, output_file):
|
34 |
+
torch_weights = numpy_weights(weight_file)
|
35 |
+
|
36 |
+
new_dict = OrderedDict()
|
37 |
+
for k, v in torch_weights.items():
|
38 |
+
if "num_batches_tracked" in k:
|
39 |
+
print("drop: {}".format(k))
|
40 |
+
continue
|
41 |
+
if k.endswith("bias"):
|
42 |
+
print("bias key: {}".format(k))
|
43 |
+
v = v.reshape(1, -1, 1, 1)
|
44 |
+
new_dict[k] = v
|
45 |
+
elif "dconv" in k and "conv.weight" in k:
|
46 |
+
print("depthwise conv key: {}".format(k))
|
47 |
+
cout, cin, k1, k2 = v.shape
|
48 |
+
v = v.reshape(cout, 1, cin, k1, k2)
|
49 |
+
new_dict[k] = v
|
50 |
+
else:
|
51 |
+
new_dict[k] = v
|
52 |
+
|
53 |
+
mge.save(new_dict, output_file)
|
54 |
+
print("save weights to {}".format(output_file))
|
55 |
+
|
56 |
+
|
57 |
+
def main():
|
58 |
+
parser = make_parser()
|
59 |
+
args = parser.parse_args()
|
60 |
+
map_weights(args.weights, args.output)
|
61 |
+
|
62 |
+
|
63 |
+
if __name__ == "__main__":
|
64 |
+
main()
|
demo/MegEngine/python/demo.py
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
import argparse
|
6 |
+
import os
|
7 |
+
import time
|
8 |
+
|
9 |
+
import cv2
|
10 |
+
import megengine as mge
|
11 |
+
import megengine.functional as F
|
12 |
+
from loguru import logger
|
13 |
+
|
14 |
+
from coco_classes import COCO_CLASSES
|
15 |
+
from process import postprocess, preprocess
|
16 |
+
from visualize import vis
|
17 |
+
from build import build_and_load
|
18 |
+
|
19 |
+
IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]
|
20 |
+
|
21 |
+
|
22 |
+
def make_parser():
|
23 |
+
parser = argparse.ArgumentParser("YOLOX Demo!")
|
24 |
+
parser.add_argument(
|
25 |
+
"demo", default="image", help="demo type, eg. image, video and webcam"
|
26 |
+
)
|
27 |
+
parser.add_argument("-n", "--name", type=str, default="yolox-s", help="model name")
|
28 |
+
parser.add_argument("--path", default="./test.png", help="path to images or video")
|
29 |
+
parser.add_argument("--camid", type=int, default=0, help="webcam demo camera id")
|
30 |
+
parser.add_argument(
|
31 |
+
"--save_result",
|
32 |
+
action="store_true",
|
33 |
+
help="whether to save the inference result of image/video",
|
34 |
+
)
|
35 |
+
|
36 |
+
parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval")
|
37 |
+
parser.add_argument("--conf", default=None, type=float, help="test conf")
|
38 |
+
parser.add_argument("--nms", default=None, type=float, help="test nms threshold")
|
39 |
+
parser.add_argument("--tsize", default=None, type=int, help="test img size")
|
40 |
+
return parser
|
41 |
+
|
42 |
+
|
43 |
+
def get_image_list(path):
|
44 |
+
image_names = []
|
45 |
+
for maindir, subdir, file_name_list in os.walk(path):
|
46 |
+
for filename in file_name_list:
|
47 |
+
apath = os.path.join(maindir, filename)
|
48 |
+
ext = os.path.splitext(apath)[1]
|
49 |
+
if ext in IMAGE_EXT:
|
50 |
+
image_names.append(apath)
|
51 |
+
return image_names
|
52 |
+
|
53 |
+
|
54 |
+
class Predictor(object):
|
55 |
+
def __init__(
|
56 |
+
self,
|
57 |
+
model,
|
58 |
+
confthre=0.01,
|
59 |
+
nmsthre=0.65,
|
60 |
+
test_size=(640, 640),
|
61 |
+
cls_names=COCO_CLASSES,
|
62 |
+
trt_file=None,
|
63 |
+
decoder=None,
|
64 |
+
):
|
65 |
+
self.model = model
|
66 |
+
self.cls_names = cls_names
|
67 |
+
self.decoder = decoder
|
68 |
+
self.num_classes = 80
|
69 |
+
self.confthre = confthre
|
70 |
+
self.nmsthre = nmsthre
|
71 |
+
self.test_size = test_size
|
72 |
+
self.rgb_means = (0.485, 0.456, 0.406)
|
73 |
+
self.std = (0.229, 0.224, 0.225)
|
74 |
+
|
75 |
+
def inference(self, img):
|
76 |
+
img_info = {"id": 0}
|
77 |
+
if isinstance(img, str):
|
78 |
+
img_info["file_name"] = os.path.basename(img)
|
79 |
+
img = cv2.imread(img)
|
80 |
+
if img is None:
|
81 |
+
raise ValueError("test image path is invalid!")
|
82 |
+
else:
|
83 |
+
img_info["file_name"] = None
|
84 |
+
|
85 |
+
height, width = img.shape[:2]
|
86 |
+
img_info["height"] = height
|
87 |
+
img_info["width"] = width
|
88 |
+
img_info["raw_img"] = img
|
89 |
+
|
90 |
+
img, ratio = preprocess(img, self.test_size, self.rgb_means, self.std)
|
91 |
+
img_info["ratio"] = ratio
|
92 |
+
img = F.expand_dims(mge.tensor(img), 0)
|
93 |
+
|
94 |
+
t0 = time.time()
|
95 |
+
outputs = self.model(img)
|
96 |
+
outputs = postprocess(outputs, self.num_classes, self.confthre, self.nmsthre)
|
97 |
+
logger.info("Infer time: {:.4f}s".format(time.time() - t0))
|
98 |
+
return outputs, img_info
|
99 |
+
|
100 |
+
def visual(self, output, img_info, cls_conf=0.35):
|
101 |
+
ratio = img_info["ratio"]
|
102 |
+
img = img_info["raw_img"]
|
103 |
+
if output is None:
|
104 |
+
return img
|
105 |
+
output = output.numpy()
|
106 |
+
|
107 |
+
# preprocessing: resize
|
108 |
+
bboxes = output[:, 0:4] / ratio
|
109 |
+
|
110 |
+
cls = output[:, 6]
|
111 |
+
scores = output[:, 4] * output[:, 5]
|
112 |
+
|
113 |
+
vis_res = vis(img, bboxes, scores, cls, cls_conf, self.cls_names)
|
114 |
+
return vis_res
|
115 |
+
|
116 |
+
|
117 |
+
def image_demo(predictor, vis_folder, path, current_time, save_result):
|
118 |
+
if os.path.isdir(path):
|
119 |
+
files = get_image_list(path)
|
120 |
+
else:
|
121 |
+
files = [path]
|
122 |
+
files.sort()
|
123 |
+
for image_name in files:
|
124 |
+
outputs, img_info = predictor.inference(image_name)
|
125 |
+
result_image = predictor.visual(outputs[0], img_info)
|
126 |
+
if save_result:
|
127 |
+
save_folder = os.path.join(
|
128 |
+
vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time)
|
129 |
+
)
|
130 |
+
os.makedirs(save_folder, exist_ok=True)
|
131 |
+
save_file_name = os.path.join(save_folder, os.path.basename(image_name))
|
132 |
+
logger.info("Saving detection result in {}".format(save_file_name))
|
133 |
+
cv2.imwrite(save_file_name, result_image)
|
134 |
+
ch = cv2.waitKey(0)
|
135 |
+
if ch == 27 or ch == ord("q") or ch == ord("Q"):
|
136 |
+
break
|
137 |
+
|
138 |
+
|
139 |
+
def imageflow_demo(predictor, vis_folder, current_time, args):
|
140 |
+
cap = cv2.VideoCapture(args.path if args.demo == "video" else args.camid)
|
141 |
+
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float
|
142 |
+
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float
|
143 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
144 |
+
save_folder = os.path.join(
|
145 |
+
vis_folder, time.strftime("%Y_%m_%d_%H_%M_%S", current_time)
|
146 |
+
)
|
147 |
+
os.makedirs(save_folder, exist_ok=True)
|
148 |
+
if args.demo == "video":
|
149 |
+
save_path = os.path.join(save_folder, args.path.split("/")[-1])
|
150 |
+
else:
|
151 |
+
save_path = os.path.join(save_folder, "camera.mp4")
|
152 |
+
logger.info(f"video save_path is {save_path}")
|
153 |
+
vid_writer = cv2.VideoWriter(
|
154 |
+
save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
|
155 |
+
)
|
156 |
+
while True:
|
157 |
+
ret_val, frame = cap.read()
|
158 |
+
if ret_val:
|
159 |
+
outputs, img_info = predictor.inference(frame)
|
160 |
+
result_frame = predictor.visual(outputs[0], img_info)
|
161 |
+
if args.save_result:
|
162 |
+
vid_writer.write(result_frame)
|
163 |
+
ch = cv2.waitKey(1)
|
164 |
+
if ch == 27 or ch == ord("q") or ch == ord("Q"):
|
165 |
+
break
|
166 |
+
else:
|
167 |
+
break
|
168 |
+
|
169 |
+
|
170 |
+
def main(args):
|
171 |
+
|
172 |
+
file_name = os.path.join("./yolox_outputs", args.name)
|
173 |
+
os.makedirs(file_name, exist_ok=True)
|
174 |
+
|
175 |
+
if args.save_result:
|
176 |
+
vis_folder = os.path.join(file_name, "vis_res")
|
177 |
+
os.makedirs(vis_folder, exist_ok=True)
|
178 |
+
|
179 |
+
confthre = 0.01
|
180 |
+
nmsthre = 0.65
|
181 |
+
test_size = (640, 640)
|
182 |
+
if args.conf is not None:
|
183 |
+
confthre = args.conf
|
184 |
+
if args.nms is not None:
|
185 |
+
nmsthre = args.nms
|
186 |
+
if args.tsize is not None:
|
187 |
+
test_size = (args.tsize, args.tsize)
|
188 |
+
|
189 |
+
model = build_and_load(args.ckpt, name=args.name)
|
190 |
+
model.eval()
|
191 |
+
|
192 |
+
predictor = Predictor(model, confthre, nmsthre, test_size, COCO_CLASSES, None, None)
|
193 |
+
current_time = time.localtime()
|
194 |
+
if args.demo == "image":
|
195 |
+
image_demo(predictor, vis_folder, args.path, current_time, args.save_result)
|
196 |
+
elif args.demo == "video" or args.demo == "webcam":
|
197 |
+
imageflow_demo(predictor, vis_folder, current_time, args)
|
198 |
+
|
199 |
+
|
200 |
+
if __name__ == "__main__":
|
201 |
+
args = make_parser().parse_args()
|
202 |
+
main(args)
|
demo/MegEngine/python/dump.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) Megvii, Inc. and its affiliates.
|
4 |
+
|
5 |
+
import argparse
|
6 |
+
|
7 |
+
import megengine as mge
|
8 |
+
import numpy as np
|
9 |
+
from megengine import jit
|
10 |
+
|
11 |
+
from build import build_and_load
|
12 |
+
|
13 |
+
|
14 |
+
def make_parser():
|
15 |
+
parser = argparse.ArgumentParser("YOLOX Demo Dump")
|
16 |
+
parser.add_argument("-n", "--name", type=str, default="yolox-s", help="model name")
|
17 |
+
parser.add_argument("-c", "--ckpt", default=None, type=str, help="ckpt for eval")
|
18 |
+
parser.add_argument(
|
19 |
+
"--dump_path", default="model.mge", help="path to save the dumped model"
|
20 |
+
)
|
21 |
+
return parser
|
22 |
+
|
23 |
+
|
24 |
+
def dump_static_graph(model, graph_name="model.mge"):
|
25 |
+
model.eval()
|
26 |
+
model.head.decode_in_inference = False
|
27 |
+
|
28 |
+
data = mge.Tensor(np.random.random((1, 3, 640, 640)))
|
29 |
+
|
30 |
+
@jit.trace(capture_as_const=True)
|
31 |
+
def pred_func(data):
|
32 |
+
outputs = model(data)
|
33 |
+
return outputs
|
34 |
+
|
35 |
+
pred_func(data)
|
36 |
+
pred_func.dump(
|
37 |
+
graph_name,
|
38 |
+
arg_names=["data"],
|
39 |
+
optimize_for_inference=True,
|
40 |
+
enable_fuse_conv_bias_nonlinearity=True,
|
41 |
+
)
|
42 |
+
|
43 |
+
|
44 |
+
def main(args):
|
45 |
+
model = build_and_load(args.ckpt, name=args.name)
|
46 |
+
dump_static_graph(model, args.dump_path)
|
47 |
+
|
48 |
+
|
49 |
+
if __name__ == "__main__":
|
50 |
+
args = make_parser().parse_args()
|
51 |
+
main(args)
|
demo/MegEngine/python/models/__init__.py
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
|
4 |
+
|
5 |
+
from .darknet import CSPDarknet, Darknet
|
6 |
+
from .yolo_fpn import YOLOFPN
|
7 |
+
from .yolo_head import YOLOXHead
|
8 |
+
from .yolo_pafpn import YOLOPAFPN
|
9 |
+
from .yolox import YOLOX
|
demo/MegEngine/python/models/darknet.py
ADDED
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
|
4 |
+
|
5 |
+
import megengine.module as M
|
6 |
+
|
7 |
+
from .network_blocks import BaseConv, CSPLayer, DWConv, Focus, ResLayer, SPPBottleneck
|
8 |
+
|
9 |
+
|
10 |
+
class Darknet(M.Module):
|
11 |
+
# number of blocks from dark2 to dark5.
|
12 |
+
depth2blocks = {21: [1, 2, 2, 1], 53: [2, 8, 8, 4]}
|
13 |
+
|
14 |
+
def __init__(
|
15 |
+
self, depth, in_channels=3, stem_out_channels=32, out_features=("dark3", "dark4", "dark5"),
|
16 |
+
):
|
17 |
+
"""
|
18 |
+
Args:
|
19 |
+
depth (int): depth of darknet used in model, usually use [21, 53] for this param.
|
20 |
+
in_channels (int): number of input channels, for example, use 3 for RGB image.
|
21 |
+
stem_out_channels (int): number of output chanels of darknet stem.
|
22 |
+
It decides channels of darknet layer2 to layer5.
|
23 |
+
out_features (Tuple[str]): desired output layer name.
|
24 |
+
"""
|
25 |
+
super().__init__()
|
26 |
+
assert out_features, "please provide output features of Darknet"
|
27 |
+
self.out_features = out_features
|
28 |
+
self.stem = M.Sequential(
|
29 |
+
BaseConv(in_channels, stem_out_channels, ksize=3, stride=1, act="lrelu"),
|
30 |
+
*self.make_group_layer(stem_out_channels, num_blocks=1, stride=2),
|
31 |
+
)
|
32 |
+
in_channels = stem_out_channels * 2 # 64
|
33 |
+
|
34 |
+
num_blocks = Darknet.depth2blocks[depth]
|
35 |
+
# create darknet with `stem_out_channels` and `num_blocks` layers.
|
36 |
+
# to make model structure more clear, we don't use `for` statement in python.
|
37 |
+
self.dark2 = M.Sequential(*self.make_group_layer(in_channels, num_blocks[0], stride=2))
|
38 |
+
in_channels *= 2 # 128
|
39 |
+
self.dark3 = M.Sequential(*self.make_group_layer(in_channels, num_blocks[1], stride=2))
|
40 |
+
in_channels *= 2 # 256
|
41 |
+
self.dark4 = M.Sequential(*self.make_group_layer(in_channels, num_blocks[2], stride=2))
|
42 |
+
in_channels *= 2 # 512
|
43 |
+
|
44 |
+
self.dark5 = M.Sequential(
|
45 |
+
*self.make_group_layer(in_channels, num_blocks[3], stride=2),
|
46 |
+
*self.make_spp_block([in_channels, in_channels * 2], in_channels * 2),
|
47 |
+
)
|
48 |
+
|
49 |
+
def make_group_layer(self, in_channels: int, num_blocks: int, stride: int = 1):
|
50 |
+
"starts with conv layer then has `num_blocks` `ResLayer`"
|
51 |
+
return [
|
52 |
+
BaseConv(in_channels, in_channels * 2, ksize=3, stride=stride, act="lrelu"),
|
53 |
+
*[(ResLayer(in_channels * 2)) for _ in range(num_blocks)]
|
54 |
+
]
|
55 |
+
|
56 |
+
def make_spp_block(self, filters_list, in_filters):
|
57 |
+
m = M.Sequential(
|
58 |
+
*[
|
59 |
+
BaseConv(in_filters, filters_list[0], 1, stride=1, act="lrelu"),
|
60 |
+
BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"),
|
61 |
+
SPPBottleneck(
|
62 |
+
in_channels=filters_list[1],
|
63 |
+
out_channels=filters_list[0],
|
64 |
+
activation="lrelu"
|
65 |
+
),
|
66 |
+
BaseConv(filters_list[0], filters_list[1], 3, stride=1, act="lrelu"),
|
67 |
+
BaseConv(filters_list[1], filters_list[0], 1, stride=1, act="lrelu"),
|
68 |
+
]
|
69 |
+
)
|
70 |
+
return m
|
71 |
+
|
72 |
+
def forward(self, x):
|
73 |
+
outputs = {}
|
74 |
+
x = self.stem(x)
|
75 |
+
outputs["stem"] = x
|
76 |
+
x = self.dark2(x)
|
77 |
+
outputs["dark2"] = x
|
78 |
+
x = self.dark3(x)
|
79 |
+
outputs["dark3"] = x
|
80 |
+
x = self.dark4(x)
|
81 |
+
outputs["dark4"] = x
|
82 |
+
x = self.dark5(x)
|
83 |
+
outputs["dark5"] = x
|
84 |
+
return {k: v for k, v in outputs.items() if k in self.out_features}
|
85 |
+
|
86 |
+
|
87 |
+
class CSPDarknet(M.Module):
|
88 |
+
|
89 |
+
def __init__(
|
90 |
+
self, dep_mul, wid_mul,
|
91 |
+
out_features=("dark3", "dark4", "dark5"),
|
92 |
+
depthwise=False, act="silu",
|
93 |
+
):
|
94 |
+
super().__init__()
|
95 |
+
assert out_features, "please provide output features of Darknet"
|
96 |
+
self.out_features = out_features
|
97 |
+
Conv = DWConv if depthwise else BaseConv
|
98 |
+
|
99 |
+
base_channels = int(wid_mul * 64) # 64
|
100 |
+
base_depth = max(round(dep_mul * 3), 1) # 3
|
101 |
+
|
102 |
+
# stem
|
103 |
+
self.stem = Focus(3, base_channels, ksize=3, act=act)
|
104 |
+
|
105 |
+
# dark2
|
106 |
+
self.dark2 = M.Sequential(
|
107 |
+
Conv(base_channels, base_channels * 2, 3, 2, act=act),
|
108 |
+
CSPLayer(
|
109 |
+
base_channels * 2, base_channels * 2,
|
110 |
+
n=base_depth, depthwise=depthwise, act=act
|
111 |
+
),
|
112 |
+
)
|
113 |
+
|
114 |
+
# dark3
|
115 |
+
self.dark3 = M.Sequential(
|
116 |
+
Conv(base_channels * 2, base_channels * 4, 3, 2, act=act),
|
117 |
+
CSPLayer(
|
118 |
+
base_channels * 4, base_channels * 4,
|
119 |
+
n=base_depth * 3, depthwise=depthwise, act=act,
|
120 |
+
),
|
121 |
+
)
|
122 |
+
|
123 |
+
# dark4
|
124 |
+
self.dark4 = M.Sequential(
|
125 |
+
Conv(base_channels * 4, base_channels * 8, 3, 2, act=act),
|
126 |
+
CSPLayer(
|
127 |
+
base_channels * 8, base_channels * 8,
|
128 |
+
n=base_depth * 3, depthwise=depthwise, act=act,
|
129 |
+
),
|
130 |
+
)
|
131 |
+
|
132 |
+
# dark5
|
133 |
+
self.dark5 = M.Sequential(
|
134 |
+
Conv(base_channels * 8, base_channels * 16, 3, 2, act=act),
|
135 |
+
SPPBottleneck(base_channels * 16, base_channels * 16, activation=act),
|
136 |
+
CSPLayer(
|
137 |
+
base_channels * 16, base_channels * 16, n=base_depth,
|
138 |
+
shortcut=False, depthwise=depthwise, act=act,
|
139 |
+
),
|
140 |
+
)
|
141 |
+
|
142 |
+
def forward(self, x):
|
143 |
+
outputs = {}
|
144 |
+
x = self.stem(x)
|
145 |
+
outputs["stem"] = x
|
146 |
+
x = self.dark2(x)
|
147 |
+
outputs["dark2"] = x
|
148 |
+
x = self.dark3(x)
|
149 |
+
outputs["dark3"] = x
|
150 |
+
x = self.dark4(x)
|
151 |
+
outputs["dark4"] = x
|
152 |
+
x = self.dark5(x)
|
153 |
+
outputs["dark5"] = x
|
154 |
+
return {k: v for k, v in outputs.items() if k in self.out_features}
|
demo/MegEngine/python/models/network_blocks.py
ADDED
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
|
4 |
+
|
5 |
+
import megengine.functional as F
|
6 |
+
import megengine.module as M
|
7 |
+
|
8 |
+
|
9 |
+
class UpSample(M.Module):
|
10 |
+
|
11 |
+
def __init__(self, scale_factor=2, mode="bilinear"):
|
12 |
+
super().__init__()
|
13 |
+
self.scale_factor = scale_factor
|
14 |
+
self.mode = mode
|
15 |
+
|
16 |
+
def forward(self, x):
|
17 |
+
return F.vision.interpolate(x, scale_factor=self.scale_factor, mode=self.mode)
|
18 |
+
|
19 |
+
|
20 |
+
class SiLU(M.Module):
|
21 |
+
"""export-friendly version of M.SiLU()"""
|
22 |
+
|
23 |
+
@staticmethod
|
24 |
+
def forward(x):
|
25 |
+
return x * F.sigmoid(x)
|
26 |
+
|
27 |
+
|
28 |
+
def get_activation(name="silu"):
|
29 |
+
if name == "silu":
|
30 |
+
module = SiLU()
|
31 |
+
elif name == "relu":
|
32 |
+
module = M.ReLU()
|
33 |
+
elif name == "lrelu":
|
34 |
+
module = M.LeakyReLU(0.1)
|
35 |
+
else:
|
36 |
+
raise AttributeError("Unsupported act type: {}".format(name))
|
37 |
+
return module
|
38 |
+
|
39 |
+
|
40 |
+
class BaseConv(M.Module):
|
41 |
+
"""A Conv2d -> Batchnorm -> silu/leaky relu block"""
|
42 |
+
|
43 |
+
def __init__(self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu"):
|
44 |
+
super().__init__()
|
45 |
+
# same padding
|
46 |
+
pad = (ksize - 1) // 2
|
47 |
+
self.conv = M.Conv2d(
|
48 |
+
in_channels,
|
49 |
+
out_channels,
|
50 |
+
kernel_size=ksize,
|
51 |
+
stride=stride,
|
52 |
+
padding=pad,
|
53 |
+
groups=groups,
|
54 |
+
bias=bias,
|
55 |
+
)
|
56 |
+
self.bn = M.BatchNorm2d(out_channels)
|
57 |
+
self.act = get_activation(act)
|
58 |
+
|
59 |
+
def forward(self, x):
|
60 |
+
return self.act(self.bn(self.conv(x)))
|
61 |
+
|
62 |
+
def fuseforward(self, x):
|
63 |
+
return self.act(self.conv(x))
|
64 |
+
|
65 |
+
|
66 |
+
class DWConv(M.Module):
|
67 |
+
"""Depthwise Conv + Conv"""
|
68 |
+
def __init__(self, in_channels, out_channels, ksize, stride=1, act="silu"):
|
69 |
+
super().__init__()
|
70 |
+
self.dconv = BaseConv(
|
71 |
+
in_channels, in_channels, ksize=ksize,
|
72 |
+
stride=stride, groups=in_channels, act=act
|
73 |
+
)
|
74 |
+
self.pconv = BaseConv(
|
75 |
+
in_channels, out_channels, ksize=1,
|
76 |
+
stride=1, groups=1, act=act
|
77 |
+
)
|
78 |
+
|
79 |
+
def forward(self, x):
|
80 |
+
x = self.dconv(x)
|
81 |
+
return self.pconv(x)
|
82 |
+
|
83 |
+
|
84 |
+
class Bottleneck(M.Module):
|
85 |
+
# Standard bottleneck
|
86 |
+
def __init__(
|
87 |
+
self, in_channels, out_channels, shortcut=True,
|
88 |
+
expansion=0.5, depthwise=False, act="silu"
|
89 |
+
):
|
90 |
+
super().__init__()
|
91 |
+
hidden_channels = int(out_channels * expansion)
|
92 |
+
Conv = DWConv if depthwise else BaseConv
|
93 |
+
self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
|
94 |
+
self.conv2 = Conv(hidden_channels, out_channels, 3, stride=1, act=act)
|
95 |
+
self.use_add = shortcut and in_channels == out_channels
|
96 |
+
|
97 |
+
def forward(self, x):
|
98 |
+
y = self.conv2(self.conv1(x))
|
99 |
+
if self.use_add:
|
100 |
+
y = y + x
|
101 |
+
return y
|
102 |
+
|
103 |
+
|
104 |
+
class ResLayer(M.Module):
|
105 |
+
"Residual layer with `in_channels` inputs."
|
106 |
+
def __init__(self, in_channels: int):
|
107 |
+
super().__init__()
|
108 |
+
mid_channels = in_channels // 2
|
109 |
+
self.layer1 = BaseConv(in_channels, mid_channels, ksize=1, stride=1, act="lrelu")
|
110 |
+
self.layer2 = BaseConv(mid_channels, in_channels, ksize=3, stride=1, act="lrelu")
|
111 |
+
|
112 |
+
def forward(self, x):
|
113 |
+
out = self.layer2(self.layer1(x))
|
114 |
+
return x + out
|
115 |
+
|
116 |
+
|
117 |
+
class SPPBottleneck(M.Module):
|
118 |
+
"""Spatial pyramid pooling layer used in YOLOv3-SPP"""
|
119 |
+
def __init__(self, in_channels, out_channels, kernel_sizes=(5, 9, 13), activation="silu"):
|
120 |
+
super().__init__()
|
121 |
+
hidden_channels = in_channels // 2
|
122 |
+
self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=activation)
|
123 |
+
self.m = [M.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2) for ks in kernel_sizes]
|
124 |
+
conv2_channels = hidden_channels * (len(kernel_sizes) + 1)
|
125 |
+
self.conv2 = BaseConv(conv2_channels, out_channels, 1, stride=1, act=activation)
|
126 |
+
|
127 |
+
def forward(self, x):
|
128 |
+
x = self.conv1(x)
|
129 |
+
x = F.concat([x] + [m(x) for m in self.m], axis=1)
|
130 |
+
x = self.conv2(x)
|
131 |
+
return x
|
132 |
+
|
133 |
+
|
134 |
+
class CSPLayer(M.Module):
|
135 |
+
"""C3 in yolov5, CSP Bottleneck with 3 convolutions"""
|
136 |
+
|
137 |
+
def __init__(
|
138 |
+
self, in_channels, out_channels, n=1,
|
139 |
+
shortcut=True, expansion=0.5, depthwise=False, act="silu"
|
140 |
+
):
|
141 |
+
"""
|
142 |
+
Args:
|
143 |
+
in_channels (int): input channels.
|
144 |
+
out_channels (int): output channels.
|
145 |
+
n (int): number of Bottlenecks. Default value: 1.
|
146 |
+
"""
|
147 |
+
# ch_in, ch_out, number, shortcut, groups, expansion
|
148 |
+
super().__init__()
|
149 |
+
hidden_channels = int(out_channels * expansion) # hidden channels
|
150 |
+
self.conv1 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
|
151 |
+
self.conv2 = BaseConv(in_channels, hidden_channels, 1, stride=1, act=act)
|
152 |
+
self.conv3 = BaseConv(2 * hidden_channels, out_channels, 1, stride=1, act=act)
|
153 |
+
module_list = [
|
154 |
+
Bottleneck(hidden_channels, hidden_channels, shortcut, 1.0, depthwise, act=act)
|
155 |
+
for _ in range(n)
|
156 |
+
]
|
157 |
+
self.m = M.Sequential(*module_list)
|
158 |
+
|
159 |
+
def forward(self, x):
|
160 |
+
x_1 = self.conv1(x)
|
161 |
+
x_2 = self.conv2(x)
|
162 |
+
x_1 = self.m(x_1)
|
163 |
+
x = F.concat((x_1, x_2), axis=1)
|
164 |
+
return self.conv3(x)
|
165 |
+
|
166 |
+
|
167 |
+
class Focus(M.Module):
|
168 |
+
"""Focus width and height information into channel space."""
|
169 |
+
|
170 |
+
def __init__(self, in_channels, out_channels, ksize=1, stride=1, act="silu"):
|
171 |
+
super().__init__()
|
172 |
+
self.conv = BaseConv(in_channels * 4, out_channels, ksize, stride, act=act)
|
173 |
+
|
174 |
+
def forward(self, x):
|
175 |
+
# shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)
|
176 |
+
patch_top_left = x[..., ::2, ::2]
|
177 |
+
patch_top_right = x[..., ::2, 1::2]
|
178 |
+
patch_bot_left = x[..., 1::2, ::2]
|
179 |
+
patch_bot_right = x[..., 1::2, 1::2]
|
180 |
+
x = F.concat(
|
181 |
+
(patch_top_left, patch_bot_left, patch_top_right, patch_bot_right,), axis=1,
|
182 |
+
)
|
183 |
+
return self.conv(x)
|
demo/MegEngine/python/models/yolo_fpn.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
|
4 |
+
|
5 |
+
import megengine.functional as F
|
6 |
+
import megengine.module as M
|
7 |
+
|
8 |
+
from .darknet import Darknet
|
9 |
+
from .network_blocks import BaseConv, UpSample
|
10 |
+
|
11 |
+
|
12 |
+
class YOLOFPN(M.Module):
|
13 |
+
"""
|
14 |
+
YOLOFPN module. Darknet 53 is the default backbone of this model.
|
15 |
+
"""
|
16 |
+
|
17 |
+
def __init__(
|
18 |
+
self, depth=53, in_features=["dark3", "dark4", "dark5"],
|
19 |
+
):
|
20 |
+
super().__init__()
|
21 |
+
|
22 |
+
self.backbone = Darknet(depth)
|
23 |
+
self.in_features = in_features
|
24 |
+
|
25 |
+
# out 1
|
26 |
+
self.out1_cbl = self._make_cbl(512, 256, 1)
|
27 |
+
self.out1 = self._make_embedding([256, 512], 512 + 256)
|
28 |
+
|
29 |
+
# out 2
|
30 |
+
self.out2_cbl = self._make_cbl(256, 128, 1)
|
31 |
+
self.out2 = self._make_embedding([128, 256], 256 + 128)
|
32 |
+
|
33 |
+
# upsample
|
34 |
+
self.upsample = UpSample(scale_factor=2, mode="bilinear")
|
35 |
+
|
36 |
+
def _make_cbl(self, _in, _out, ks):
|
37 |
+
return BaseConv(_in, _out, ks, stride=1, act="lrelu")
|
38 |
+
|
39 |
+
def _make_embedding(self, filters_list, in_filters):
|
40 |
+
m = M.Sequential(
|
41 |
+
*[
|
42 |
+
self._make_cbl(in_filters, filters_list[0], 1),
|
43 |
+
self._make_cbl(filters_list[0], filters_list[1], 3),
|
44 |
+
|
45 |
+
self._make_cbl(filters_list[1], filters_list[0], 1),
|
46 |
+
|
47 |
+
self._make_cbl(filters_list[0], filters_list[1], 3),
|
48 |
+
self._make_cbl(filters_list[1], filters_list[0], 1),
|
49 |
+
]
|
50 |
+
)
|
51 |
+
return m
|
52 |
+
|
53 |
+
def forward(self, inputs):
|
54 |
+
"""
|
55 |
+
Args:
|
56 |
+
inputs (Tensor): input image.
|
57 |
+
|
58 |
+
Returns:
|
59 |
+
Tuple[Tensor]: FPN output features..
|
60 |
+
"""
|
61 |
+
# backbone
|
62 |
+
out_features = self.backbone(inputs)
|
63 |
+
x2, x1, x0 = [out_features[f] for f in self.in_features]
|
64 |
+
|
65 |
+
# yolo branch 1
|
66 |
+
x1_in = self.out1_cbl(x0)
|
67 |
+
x1_in = self.upsample(x1_in)
|
68 |
+
x1_in = F.concat([x1_in, x1], 1)
|
69 |
+
out_dark4 = self.out1(x1_in)
|
70 |
+
|
71 |
+
# yolo branch 2
|
72 |
+
x2_in = self.out2_cbl(out_dark4)
|
73 |
+
x2_in = self.upsample(x2_in)
|
74 |
+
x2_in = F.concat([x2_in, x2], 1)
|
75 |
+
out_dark3 = self.out2(x2_in)
|
76 |
+
|
77 |
+
outputs = (out_dark3, out_dark4, x0)
|
78 |
+
return outputs
|
demo/MegEngine/python/models/yolo_head.py
ADDED
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
|
4 |
+
|
5 |
+
import megengine.functional as F
|
6 |
+
import megengine.module as M
|
7 |
+
|
8 |
+
from .network_blocks import BaseConv, DWConv
|
9 |
+
|
10 |
+
|
11 |
+
def meshgrid(x, y):
|
12 |
+
"""meshgrid wrapper for megengine"""
|
13 |
+
assert len(x.shape) == 1
|
14 |
+
assert len(y.shape) == 1
|
15 |
+
mesh_shape = (y.shape[0], x.shape[0])
|
16 |
+
mesh_x = F.broadcast_to(x, mesh_shape)
|
17 |
+
mesh_y = F.broadcast_to(y.reshape(-1, 1), mesh_shape)
|
18 |
+
return mesh_x, mesh_y
|
19 |
+
|
20 |
+
|
21 |
+
class YOLOXHead(M.Module):
|
22 |
+
def __init__(
|
23 |
+
self, num_classes, width=1.0, strides=[8, 16, 32],
|
24 |
+
in_channels=[256, 512, 1024], act="silu", depthwise=False
|
25 |
+
):
|
26 |
+
"""
|
27 |
+
Args:
|
28 |
+
act (str): activation type of conv. Defalut value: "silu".
|
29 |
+
depthwise (bool): wheather apply depthwise conv in conv branch. Defalut value: False.
|
30 |
+
"""
|
31 |
+
super().__init__()
|
32 |
+
|
33 |
+
self.n_anchors = 1
|
34 |
+
self.num_classes = num_classes
|
35 |
+
self.decode_in_inference = True # save for matching
|
36 |
+
|
37 |
+
self.cls_convs = []
|
38 |
+
self.reg_convs = []
|
39 |
+
self.cls_preds = []
|
40 |
+
self.reg_preds = []
|
41 |
+
self.obj_preds = []
|
42 |
+
self.stems = []
|
43 |
+
Conv = DWConv if depthwise else BaseConv
|
44 |
+
|
45 |
+
for i in range(len(in_channels)):
|
46 |
+
self.stems.append(
|
47 |
+
BaseConv(
|
48 |
+
in_channels=int(in_channels[i] * width),
|
49 |
+
out_channels=int(256 * width),
|
50 |
+
ksize=1,
|
51 |
+
stride=1,
|
52 |
+
act=act,
|
53 |
+
)
|
54 |
+
)
|
55 |
+
self.cls_convs.append(
|
56 |
+
M.Sequential(
|
57 |
+
*[
|
58 |
+
Conv(
|
59 |
+
in_channels=int(256 * width),
|
60 |
+
out_channels=int(256 * width),
|
61 |
+
ksize=3,
|
62 |
+
stride=1,
|
63 |
+
act=act,
|
64 |
+
),
|
65 |
+
Conv(
|
66 |
+
in_channels=int(256 * width),
|
67 |
+
out_channels=int(256 * width),
|
68 |
+
ksize=3,
|
69 |
+
stride=1,
|
70 |
+
act=act,
|
71 |
+
),
|
72 |
+
]
|
73 |
+
)
|
74 |
+
)
|
75 |
+
self.reg_convs.append(
|
76 |
+
M.Sequential(
|
77 |
+
*[
|
78 |
+
Conv(
|
79 |
+
in_channels=int(256 * width),
|
80 |
+
out_channels=int(256 * width),
|
81 |
+
ksize=3,
|
82 |
+
stride=1,
|
83 |
+
act=act,
|
84 |
+
),
|
85 |
+
Conv(
|
86 |
+
in_channels=int(256 * width),
|
87 |
+
out_channels=int(256 * width),
|
88 |
+
ksize=3,
|
89 |
+
stride=1,
|
90 |
+
act=act,
|
91 |
+
),
|
92 |
+
]
|
93 |
+
)
|
94 |
+
)
|
95 |
+
self.cls_preds.append(
|
96 |
+
M.Conv2d(
|
97 |
+
in_channels=int(256 * width),
|
98 |
+
out_channels=self.n_anchors * self.num_classes,
|
99 |
+
kernel_size=1,
|
100 |
+
stride=1,
|
101 |
+
padding=0,
|
102 |
+
)
|
103 |
+
)
|
104 |
+
self.reg_preds.append(
|
105 |
+
M.Conv2d(
|
106 |
+
in_channels=int(256 * width),
|
107 |
+
out_channels=4,
|
108 |
+
kernel_size=1,
|
109 |
+
stride=1,
|
110 |
+
padding=0,
|
111 |
+
)
|
112 |
+
)
|
113 |
+
self.obj_preds.append(
|
114 |
+
M.Conv2d(
|
115 |
+
in_channels=int(256 * width),
|
116 |
+
out_channels=self.n_anchors * 1,
|
117 |
+
kernel_size=1,
|
118 |
+
stride=1,
|
119 |
+
padding=0,
|
120 |
+
)
|
121 |
+
)
|
122 |
+
|
123 |
+
self.use_l1 = False
|
124 |
+
self.strides = strides
|
125 |
+
self.grids = [F.zeros(1)] * len(in_channels)
|
126 |
+
self.expanded_strides = [None] * len(in_channels)
|
127 |
+
|
128 |
+
def forward(self, xin, labels=None, imgs=None):
|
129 |
+
outputs = []
|
130 |
+
assert not self.training
|
131 |
+
|
132 |
+
for k, (cls_conv, reg_conv, stride_this_level, x) in enumerate(
|
133 |
+
zip(self.cls_convs, self.reg_convs, self.strides, xin)
|
134 |
+
):
|
135 |
+
x = self.stems[k](x)
|
136 |
+
cls_x = x
|
137 |
+
reg_x = x
|
138 |
+
|
139 |
+
cls_feat = cls_conv(cls_x)
|
140 |
+
cls_output = self.cls_preds[k](cls_feat)
|
141 |
+
|
142 |
+
reg_feat = reg_conv(reg_x)
|
143 |
+
reg_output = self.reg_preds[k](reg_feat)
|
144 |
+
obj_output = self.obj_preds[k](reg_feat)
|
145 |
+
output = F.concat([reg_output, F.sigmoid(obj_output), F.sigmoid(cls_output)], 1)
|
146 |
+
outputs.append(output)
|
147 |
+
|
148 |
+
self.hw = [x.shape[-2:] for x in outputs]
|
149 |
+
# [batch, n_anchors_all, 85]
|
150 |
+
outputs = F.concat([F.flatten(x, start_axis=2) for x in outputs], axis=2)
|
151 |
+
outputs = F.transpose(outputs, (0, 2, 1))
|
152 |
+
if self.decode_in_inference:
|
153 |
+
return self.decode_outputs(outputs)
|
154 |
+
else:
|
155 |
+
return outputs
|
156 |
+
|
157 |
+
def get_output_and_grid(self, output, k, stride, dtype):
|
158 |
+
grid = self.grids[k]
|
159 |
+
|
160 |
+
batch_size = output.shape[0]
|
161 |
+
n_ch = 5 + self.num_classes
|
162 |
+
hsize, wsize = output.shape[-2:]
|
163 |
+
if grid.shape[2:4] != output.shape[2:4]:
|
164 |
+
yv, xv = meshgrid([F.arange(hsize), F.arange(wsize)])
|
165 |
+
grid = F.stack((xv, yv), 2).reshape(1, 1, hsize, wsize, 2).type(dtype)
|
166 |
+
self.grids[k] = grid
|
167 |
+
|
168 |
+
output = output.view(batch_size, self.n_anchors, n_ch, hsize, wsize)
|
169 |
+
output = (
|
170 |
+
output.permute(0, 1, 3, 4, 2)
|
171 |
+
.reshape(batch_size, self.n_anchors * hsize * wsize, -1)
|
172 |
+
)
|
173 |
+
grid = grid.view(1, -1, 2)
|
174 |
+
output[..., :2] = (output[..., :2] + grid) * stride
|
175 |
+
output[..., 2:4] = F.exp(output[..., 2:4]) * stride
|
176 |
+
return output, grid
|
177 |
+
|
178 |
+
def decode_outputs(self, outputs):
|
179 |
+
grids = []
|
180 |
+
strides = []
|
181 |
+
for (hsize, wsize), stride in zip(self.hw, self.strides):
|
182 |
+
xv, yv = meshgrid(F.arange(hsize), F.arange(wsize))
|
183 |
+
grid = F.stack((xv, yv), 2).reshape(1, -1, 2)
|
184 |
+
grids.append(grid)
|
185 |
+
shape = grid.shape[:2]
|
186 |
+
strides.append(F.full((*shape, 1), stride))
|
187 |
+
|
188 |
+
grids = F.concat(grids, axis=1)
|
189 |
+
strides = F.concat(strides, axis=1)
|
190 |
+
|
191 |
+
outputs[..., :2] = (outputs[..., :2] + grids) * strides
|
192 |
+
outputs[..., 2:4] = F.exp(outputs[..., 2:4]) * strides
|
193 |
+
return outputs
|
demo/MegEngine/python/models/yolo_pafpn.py
ADDED
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
|
4 |
+
|
5 |
+
import megengine.module as M
|
6 |
+
import megengine.functional as F
|
7 |
+
|
8 |
+
from .darknet import CSPDarknet
|
9 |
+
from .network_blocks import BaseConv, CSPLayer, DWConv, UpSample
|
10 |
+
|
11 |
+
|
12 |
+
class YOLOPAFPN(M.Module):
|
13 |
+
"""
|
14 |
+
YOLOv3 model. Darknet 53 is the default backbone of this model.
|
15 |
+
"""
|
16 |
+
|
17 |
+
def __init__(
|
18 |
+
self, depth=1.0, width=1.0, in_features=("dark3", "dark4", "dark5"),
|
19 |
+
in_channels=[256, 512, 1024], depthwise=False, act="silu",
|
20 |
+
):
|
21 |
+
super().__init__()
|
22 |
+
self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
|
23 |
+
self.in_features = in_features
|
24 |
+
self.in_channels = in_channels
|
25 |
+
Conv = DWConv if depthwise else BaseConv
|
26 |
+
|
27 |
+
self.upsample = UpSample(scale_factor=2, mode="bilinear")
|
28 |
+
self.lateral_conv0 = BaseConv(
|
29 |
+
int(in_channels[2] * width), int(in_channels[1] * width), 1, 1, act=act
|
30 |
+
)
|
31 |
+
self.C3_p4 = CSPLayer(
|
32 |
+
int(2 * in_channels[1] * width),
|
33 |
+
int(in_channels[1] * width),
|
34 |
+
round(3 * depth),
|
35 |
+
False,
|
36 |
+
depthwise=depthwise,
|
37 |
+
act=act,
|
38 |
+
) # cat
|
39 |
+
|
40 |
+
self.reduce_conv1 = BaseConv(
|
41 |
+
int(in_channels[1] * width), int(in_channels[0] * width), 1, 1, act=act
|
42 |
+
)
|
43 |
+
self.C3_p3 = CSPLayer(
|
44 |
+
int(2 * in_channels[0] * width),
|
45 |
+
int(in_channels[0] * width),
|
46 |
+
round(3 * depth),
|
47 |
+
False,
|
48 |
+
depthwise=depthwise,
|
49 |
+
act=act,
|
50 |
+
)
|
51 |
+
|
52 |
+
# bottom-up conv
|
53 |
+
self.bu_conv2 = Conv(
|
54 |
+
int(in_channels[0] * width), int(in_channels[0] * width), 3, 2, act=act
|
55 |
+
)
|
56 |
+
self.C3_n3 = CSPLayer(
|
57 |
+
int(2 * in_channels[0] * width),
|
58 |
+
int(in_channels[1] * width),
|
59 |
+
round(3 * depth),
|
60 |
+
False,
|
61 |
+
depthwise=depthwise,
|
62 |
+
act=act,
|
63 |
+
)
|
64 |
+
|
65 |
+
# bottom-up conv
|
66 |
+
self.bu_conv1 = Conv(
|
67 |
+
int(in_channels[1] * width), int(in_channels[1] * width), 3, 2, act=act
|
68 |
+
)
|
69 |
+
self.C3_n4 = CSPLayer(
|
70 |
+
int(2 * in_channels[1] * width),
|
71 |
+
int(in_channels[2] * width),
|
72 |
+
round(3 * depth),
|
73 |
+
False,
|
74 |
+
depthwise=depthwise,
|
75 |
+
act=act,
|
76 |
+
)
|
77 |
+
|
78 |
+
def forward(self, input):
|
79 |
+
"""
|
80 |
+
Args:
|
81 |
+
inputs: input images.
|
82 |
+
|
83 |
+
Returns:
|
84 |
+
Tuple[Tensor]: FPN feature.
|
85 |
+
"""
|
86 |
+
|
87 |
+
# backbone
|
88 |
+
out_features = self.backbone(input)
|
89 |
+
features = [out_features[f] for f in self.in_features]
|
90 |
+
[x2, x1, x0] = features
|
91 |
+
|
92 |
+
fpn_out0 = self.lateral_conv0(x0) # 1024->512/32
|
93 |
+
f_out0 = self.upsample(fpn_out0) # 512/16
|
94 |
+
f_out0 = F.concat([f_out0, x1], 1) # 512->1024/16
|
95 |
+
f_out0 = self.C3_p4(f_out0) # 1024->512/16
|
96 |
+
|
97 |
+
fpn_out1 = self.reduce_conv1(f_out0) # 512->256/16
|
98 |
+
f_out1 = self.upsample(fpn_out1) # 256/8
|
99 |
+
f_out1 = F.concat([f_out1, x2], 1) # 256->512/8
|
100 |
+
pan_out2 = self.C3_p3(f_out1) # 512->256/8
|
101 |
+
|
102 |
+
p_out1 = self.bu_conv2(pan_out2) # 256->256/16
|
103 |
+
p_out1 = F.concat([p_out1, fpn_out1], 1) # 256->512/16
|
104 |
+
pan_out1 = self.C3_n3(p_out1) # 512->512/16
|
105 |
+
|
106 |
+
p_out0 = self.bu_conv1(pan_out1) # 512->512/32
|
107 |
+
p_out0 = F.concat([p_out0, fpn_out0], 1) # 512->1024/32
|
108 |
+
pan_out0 = self.C3_n4(p_out0) # 1024->1024/32
|
109 |
+
|
110 |
+
outputs = (pan_out2, pan_out1, pan_out0)
|
111 |
+
return outputs
|
demo/MegEngine/python/models/yolox.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- encoding: utf-8 -*-
|
3 |
+
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
|
4 |
+
|
5 |
+
import megengine.module as M
|
6 |
+
|
7 |
+
from .yolo_head import YOLOXHead
|
8 |
+
from .yolo_pafpn import YOLOPAFPN
|
9 |
+
|
10 |
+
|
11 |
+
class YOLOX(M.Module):
|
12 |
+
"""
|
13 |
+
YOLOX model module. The module list is defined by create_yolov3_modules function.
|
14 |
+
The network returns loss values from three YOLO layers during training
|
15 |
+
and detection results during test.
|
16 |
+
"""
|
17 |
+
|
18 |
+
def __init__(self, backbone=None, head=None):
|
19 |
+
super().__init__()
|
20 |
+
if backbone is None:
|
21 |
+
backbone = YOLOPAFPN()
|
22 |
+
if head is None:
|
23 |
+
head = YOLOXHead(80)
|
24 |
+
|
25 |
+
self.backbone = backbone
|
26 |
+
self.head = head
|
27 |
+
|
28 |
+
def forward(self, x):
|
29 |
+
# fpn output content features of [dark3, dark4, dark5]
|
30 |
+
fpn_outs = self.backbone(x)
|
31 |
+
assert not self.training
|
32 |
+
outputs = self.head(fpn_outs)
|
33 |
+
|
34 |
+
return outputs
|
demo/MegEngine/python/process.py
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
|
4 |
+
|
5 |
+
import cv2
|
6 |
+
import megengine.functional as F
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
__all__ = [
|
10 |
+
"preprocess",
|
11 |
+
"postprocess",
|
12 |
+
]
|
13 |
+
|
14 |
+
|
15 |
+
def preprocess(image, input_size, mean, std, swap=(2, 0, 1)):
|
16 |
+
if len(image.shape) == 3:
|
17 |
+
padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
|
18 |
+
else:
|
19 |
+
padded_img = np.ones(input_size) * 114.0
|
20 |
+
img = np.array(image)
|
21 |
+
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
|
22 |
+
resized_img = cv2.resize(
|
23 |
+
img,
|
24 |
+
(int(img.shape[1] * r), int(img.shape[0] * r)),
|
25 |
+
interpolation=cv2.INTER_LINEAR,
|
26 |
+
).astype(np.float32)
|
27 |
+
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
|
28 |
+
image = padded_img
|
29 |
+
|
30 |
+
image = image.astype(np.float32)
|
31 |
+
image = image[:, :, ::-1]
|
32 |
+
image /= 255.0
|
33 |
+
if mean is not None:
|
34 |
+
image -= mean
|
35 |
+
if std is not None:
|
36 |
+
image /= std
|
37 |
+
image = image.transpose(swap)
|
38 |
+
image = np.ascontiguousarray(image, dtype=np.float32)
|
39 |
+
return image, r
|
40 |
+
|
41 |
+
|
42 |
+
def postprocess(prediction, num_classes, conf_thre=0.7, nms_thre=0.45):
|
43 |
+
box_corner = F.zeros_like(prediction)
|
44 |
+
box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
|
45 |
+
box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
|
46 |
+
box_corner[:, :, 2] = prediction[:, :, 0] + prediction[:, :, 2] / 2
|
47 |
+
box_corner[:, :, 3] = prediction[:, :, 1] + prediction[:, :, 3] / 2
|
48 |
+
prediction[:, :, :4] = box_corner[:, :, :4]
|
49 |
+
|
50 |
+
output = [None for _ in range(len(prediction))]
|
51 |
+
for i, image_pred in enumerate(prediction):
|
52 |
+
|
53 |
+
# If none are remaining => process next image
|
54 |
+
if not image_pred.shape[0]:
|
55 |
+
continue
|
56 |
+
# Get score and class with highest confidence
|
57 |
+
class_conf = F.max(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True)
|
58 |
+
class_pred = F.argmax(image_pred[:, 5 : 5 + num_classes], 1, keepdims=True)
|
59 |
+
|
60 |
+
class_conf_squeeze = F.squeeze(class_conf)
|
61 |
+
conf_mask = image_pred[:, 4] * class_conf_squeeze >= conf_thre
|
62 |
+
detections = F.concat((image_pred[:, :5], class_conf, class_pred), 1)
|
63 |
+
detections = detections[conf_mask]
|
64 |
+
if not detections.shape[0]:
|
65 |
+
continue
|
66 |
+
|
67 |
+
nms_out_index = F.vision.nms(
|
68 |
+
detections[:, :4], detections[:, 4] * detections[:, 5], nms_thre,
|
69 |
+
)
|
70 |
+
detections = detections[nms_out_index]
|
71 |
+
if output[i] is None:
|
72 |
+
output[i] = detections
|
73 |
+
else:
|
74 |
+
output[i] = F.concat((output[i], detections))
|
75 |
+
|
76 |
+
return output
|
demo/MegEngine/python/visualize.py
ADDED
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
# -*- coding:utf-8 -*-
|
3 |
+
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
|
4 |
+
|
5 |
+
import cv2
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
__all__ = ["vis"]
|
9 |
+
|
10 |
+
|
11 |
+
def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
|
12 |
+
|
13 |
+
for i in range(len(boxes)):
|
14 |
+
box = boxes[i]
|
15 |
+
cls_id = int(cls_ids[i])
|
16 |
+
score = scores[i]
|
17 |
+
if score < conf:
|
18 |
+
continue
|
19 |
+
x0 = int(box[0])
|
20 |
+
y0 = int(box[1])
|
21 |
+
x1 = int(box[2])
|
22 |
+
y1 = int(box[3])
|
23 |
+
|
24 |
+
color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
|
25 |
+
text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
|
26 |
+
txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
|
27 |
+
font = cv2.FONT_HERSHEY_SIMPLEX
|
28 |
+
|
29 |
+
txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
|
30 |
+
cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
|
31 |
+
|
32 |
+
txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
|
33 |
+
cv2.rectangle(
|
34 |
+
img,
|
35 |
+
(x0, y0 + 1),
|
36 |
+
(x0 + txt_size[0] + 1, y0 + int(1.5*txt_size[1])),
|
37 |
+
txt_bk_color,
|
38 |
+
-1
|
39 |
+
)
|
40 |
+
cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
|
41 |
+
|
42 |
+
return img
|
43 |
+
|
44 |
+
|
45 |
+
_COLORS = np.array(
|
46 |
+
[
|
47 |
+
0.000, 0.447, 0.741,
|
48 |
+
0.850, 0.325, 0.098,
|
49 |
+
0.929, 0.694, 0.125,
|
50 |
+
0.494, 0.184, 0.556,
|
51 |
+
0.466, 0.674, 0.188,
|
52 |
+
0.301, 0.745, 0.933,
|
53 |
+
0.635, 0.078, 0.184,
|
54 |
+
0.300, 0.300, 0.300,
|
55 |
+
0.600, 0.600, 0.600,
|
56 |
+
1.000, 0.000, 0.000,
|
57 |
+
1.000, 0.500, 0.000,
|
58 |
+
0.749, 0.749, 0.000,
|
59 |
+
0.000, 1.000, 0.000,
|
60 |
+
0.000, 0.000, 1.000,
|
61 |
+
0.667, 0.000, 1.000,
|
62 |
+
0.333, 0.333, 0.000,
|
63 |
+
0.333, 0.667, 0.000,
|
64 |
+
0.333, 1.000, 0.000,
|
65 |
+
0.667, 0.333, 0.000,
|
66 |
+
0.667, 0.667, 0.000,
|
67 |
+
0.667, 1.000, 0.000,
|
68 |
+
1.000, 0.333, 0.000,
|
69 |
+
1.000, 0.667, 0.000,
|
70 |
+
1.000, 1.000, 0.000,
|
71 |
+
0.000, 0.333, 0.500,
|
72 |
+
0.000, 0.667, 0.500,
|
73 |
+
0.000, 1.000, 0.500,
|
74 |
+
0.333, 0.000, 0.500,
|
75 |
+
0.333, 0.333, 0.500,
|
76 |
+
0.333, 0.667, 0.500,
|
77 |
+
0.333, 1.000, 0.500,
|
78 |
+
0.667, 0.000, 0.500,
|
79 |
+
0.667, 0.333, 0.500,
|
80 |
+
0.667, 0.667, 0.500,
|
81 |
+
0.667, 1.000, 0.500,
|
82 |
+
1.000, 0.000, 0.500,
|
83 |
+
1.000, 0.333, 0.500,
|
84 |
+
1.000, 0.667, 0.500,
|
85 |
+
1.000, 1.000, 0.500,
|
86 |
+
0.000, 0.333, 1.000,
|
87 |
+
0.000, 0.667, 1.000,
|
88 |
+
0.000, 1.000, 1.000,
|
89 |
+
0.333, 0.000, 1.000,
|
90 |
+
0.333, 0.333, 1.000,
|
91 |
+
0.333, 0.667, 1.000,
|
92 |
+
0.333, 1.000, 1.000,
|
93 |
+
0.667, 0.000, 1.000,
|
94 |
+
0.667, 0.333, 1.000,
|
95 |
+
0.667, 0.667, 1.000,
|
96 |
+
0.667, 1.000, 1.000,
|
97 |
+
1.000, 0.000, 1.000,
|
98 |
+
1.000, 0.333, 1.000,
|
99 |
+
1.000, 0.667, 1.000,
|
100 |
+
0.333, 0.000, 0.000,
|
101 |
+
0.500, 0.000, 0.000,
|
102 |
+
0.667, 0.000, 0.000,
|
103 |
+
0.833, 0.000, 0.000,
|
104 |
+
1.000, 0.000, 0.000,
|
105 |
+
0.000, 0.167, 0.000,
|
106 |
+
0.000, 0.333, 0.000,
|
107 |
+
0.000, 0.500, 0.000,
|
108 |
+
0.000, 0.667, 0.000,
|
109 |
+
0.000, 0.833, 0.000,
|
110 |
+
0.000, 1.000, 0.000,
|
111 |
+
0.000, 0.000, 0.167,
|
112 |
+
0.000, 0.000, 0.333,
|
113 |
+
0.000, 0.000, 0.500,
|
114 |
+
0.000, 0.000, 0.667,
|
115 |
+
0.000, 0.000, 0.833,
|
116 |
+
0.000, 0.000, 1.000,
|
117 |
+
0.000, 0.000, 0.000,
|
118 |
+
0.143, 0.143, 0.143,
|
119 |
+
0.286, 0.286, 0.286,
|
120 |
+
0.429, 0.429, 0.429,
|
121 |
+
0.571, 0.571, 0.571,
|
122 |
+
0.714, 0.714, 0.714,
|
123 |
+
0.857, 0.857, 0.857,
|
124 |
+
0.000, 0.447, 0.741,
|
125 |
+
0.314, 0.717, 0.741,
|
126 |
+
0.50, 0.5, 0
|
127 |
+
]
|
128 |
+
).astype(np.float32).reshape(-1, 3)
|