File size: 5,267 Bytes
67bb36a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
#include "yolov5.hpp"
#include "zedcam.hpp"
#include <csignal>
static volatile bool keep_running = true;
void keyboard_handler(int sig) {
// handle keyboard interrupt
if (sig == SIGINT)
keep_running = false;
}
int main(int argc, char** argv) {
signal(SIGINT, keyboard_handler);
cudaSetDevice(DEVICE);
// CUcontext ctx;
// CUdevice device;
// cuInit(0);
// cuDeviceGet(&device, 0);
// cuCtxCreate(&ctx, 0, device);
std::string engine_name = "../mcnet.engine";
// deserialize the .engine and run inference
std::ifstream file(engine_name, std::ios::binary);
if (!file.good()) {
std::cerr << "read " << engine_name << " error!" << std::endl;
return -1;
}
char *trtModelStream = nullptr;
size_t size = 0;
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size);
file.close();
// prepare data ---------------------------
static float det_out[BATCH_SIZE * OUTPUT_SIZE];
static int seg_out[BATCH_SIZE * IMG_H * IMG_W];
static int lane_out[BATCH_SIZE * IMG_H * IMG_W];
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
assert(engine->getNbBindings() == 4);
void* buffers[4];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int output_det_index = engine->getBindingIndex(OUTPUT_DET_NAME);
const int output_seg_index = engine->getBindingIndex(OUTPUT_SEG_NAME);
const int output_lane_index = engine->getBindingIndex(OUTPUT_LANE_NAME);
assert(inputIndex == 0);
assert(output_det_index == 1);
assert(output_seg_index == 2);
assert(output_lane_index == 3);
// Create GPU buffers on device
CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc(&buffers[output_det_index], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
CUDA_CHECK(cudaMalloc(&buffers[output_seg_index], BATCH_SIZE * IMG_H * IMG_W * sizeof(int)));
CUDA_CHECK(cudaMalloc(&buffers[output_lane_index], BATCH_SIZE * IMG_H * IMG_W * sizeof(int)));
// Create stream
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
// create zed
auto zed = create_camera();
sl::Resolution image_size = zed->getCameraInformation().camera_configuration.resolution;
sl::Mat img_zed(image_size.width, image_size.height, sl::MAT_TYPE::U8_C4, sl::MEM::GPU);
cv::cuda::GpuMat img_ocv = slMat2cvMatGPU(img_zed);
cv::cuda::GpuMat cvt_img(image_size.height, image_size.width, CV_8UC3);
// store seg results
cv::Mat tmp_seg(IMG_H, IMG_W, CV_32S, seg_out);
// sotore lane results
cv::Mat tmp_lane(IMG_H, IMG_W, CV_32S, lane_out);
cv::Mat seg_res(image_size.height, image_size.width, CV_32S);
cv::Mat lane_res(image_size.height, image_size.width, CV_32S);
char key = ' ';
while (keep_running and key != 'q') {
// retrieve img
if (zed->grab() != sl::ERROR_CODE::SUCCESS) continue;
zed->retrieveImage(img_zed, sl::VIEW::LEFT, sl::MEM::GPU);
cudaSetDevice(DEVICE);
cv::cuda::cvtColor(img_ocv, cvt_img, cv::COLOR_BGRA2BGR);
// preprocess ~3ms
preprocess_img_gpu(cvt_img, (float*)buffers[inputIndex], INPUT_W, INPUT_H); // letterbox
// buffers[inputIndex] = pr_img.data;
// Run inference
auto start = std::chrono::system_clock::now();
// cuCtxPushCurrent(ctx);
doInference(*context, stream, buffers, det_out, seg_out, lane_out, BATCH_SIZE);
// cuCtxPopCurrent(&ctx);
auto end = std::chrono::system_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
// postprocess ~0ms
std::vector<Yolo::Detection> batch_res;
nms(batch_res, det_out, CONF_THRESH, NMS_THRESH);
cv::resize(tmp_seg, seg_res, seg_res.size(), 0, 0, cv::INTER_NEAREST);
cv::resize(tmp_lane, lane_res, lane_res.size(), 0, 0, cv::INTER_NEAREST);
// show results
//std::cout << res.size() << std::endl;
visualization(cvt_img, seg_res, lane_res, batch_res, key);
}
// destroy windows
#ifdef SHOW_IMG
cv::destroyAllWindows();
#endif
// close camera
img_zed.free();
zed->close();
delete zed;
// Release stream and buffers
cudaStreamDestroy(stream);
CUDA_CHECK(cudaFree(buffers[inputIndex]));
CUDA_CHECK(cudaFree(buffers[output_det_index]));
CUDA_CHECK(cudaFree(buffers[output_seg_index]));
CUDA_CHECK(cudaFree(buffers[output_lane_index]));
// Destroy the engine
context->destroy();
engine->destroy();
runtime->destroy();
return 0;
}
|