#include "yolov5.hpp" #include "zedcam.hpp" #include static volatile bool keep_running = true; void keyboard_handler(int sig) { // handle keyboard interrupt if (sig == SIGINT) keep_running = false; } int main(int argc, char** argv) { signal(SIGINT, keyboard_handler); cudaSetDevice(DEVICE); // CUcontext ctx; // CUdevice device; // cuInit(0); // cuDeviceGet(&device, 0); // cuCtxCreate(&ctx, 0, device); std::string engine_name = "../mcnet.engine"; // deserialize the .engine and run inference std::ifstream file(engine_name, std::ios::binary); if (!file.good()) { std::cerr << "read " << engine_name << " error!" << std::endl; return -1; } char *trtModelStream = nullptr; size_t size = 0; file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); trtModelStream = new char[size]; assert(trtModelStream); file.read(trtModelStream, size); file.close(); // prepare data --------------------------- static float det_out[BATCH_SIZE * OUTPUT_SIZE]; static int seg_out[BATCH_SIZE * IMG_H * IMG_W]; static int lane_out[BATCH_SIZE * IMG_H * IMG_W]; IRuntime* runtime = createInferRuntime(gLogger); assert(runtime != nullptr); ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size); assert(engine != nullptr); IExecutionContext* context = engine->createExecutionContext(); assert(context != nullptr); delete[] trtModelStream; assert(engine->getNbBindings() == 4); void* buffers[4]; // In order to bind the buffers, we need to know the names of the input and output tensors. // Note that indices are guaranteed to be less than IEngine::getNbBindings() const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME); const int output_det_index = engine->getBindingIndex(OUTPUT_DET_NAME); const int output_seg_index = engine->getBindingIndex(OUTPUT_SEG_NAME); const int output_lane_index = engine->getBindingIndex(OUTPUT_LANE_NAME); assert(inputIndex == 0); assert(output_det_index == 1); assert(output_seg_index == 2); assert(output_lane_index == 3); // Create GPU buffers on device CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float))); CUDA_CHECK(cudaMalloc(&buffers[output_det_index], BATCH_SIZE * OUTPUT_SIZE * sizeof(float))); CUDA_CHECK(cudaMalloc(&buffers[output_seg_index], BATCH_SIZE * IMG_H * IMG_W * sizeof(int))); CUDA_CHECK(cudaMalloc(&buffers[output_lane_index], BATCH_SIZE * IMG_H * IMG_W * sizeof(int))); // Create stream cudaStream_t stream; CUDA_CHECK(cudaStreamCreate(&stream)); // create zed auto zed = create_camera(); sl::Resolution image_size = zed->getCameraInformation().camera_configuration.resolution; sl::Mat img_zed(image_size.width, image_size.height, sl::MAT_TYPE::U8_C4, sl::MEM::GPU); cv::cuda::GpuMat img_ocv = slMat2cvMatGPU(img_zed); cv::cuda::GpuMat cvt_img(image_size.height, image_size.width, CV_8UC3); // store seg results cv::Mat tmp_seg(IMG_H, IMG_W, CV_32S, seg_out); // sotore lane results cv::Mat tmp_lane(IMG_H, IMG_W, CV_32S, lane_out); cv::Mat seg_res(image_size.height, image_size.width, CV_32S); cv::Mat lane_res(image_size.height, image_size.width, CV_32S); char key = ' '; while (keep_running and key != 'q') { // retrieve img if (zed->grab() != sl::ERROR_CODE::SUCCESS) continue; zed->retrieveImage(img_zed, sl::VIEW::LEFT, sl::MEM::GPU); cudaSetDevice(DEVICE); cv::cuda::cvtColor(img_ocv, cvt_img, cv::COLOR_BGRA2BGR); // preprocess ~3ms preprocess_img_gpu(cvt_img, (float*)buffers[inputIndex], INPUT_W, INPUT_H); // letterbox // buffers[inputIndex] = pr_img.data; // Run inference auto start = std::chrono::system_clock::now(); // cuCtxPushCurrent(ctx); doInference(*context, stream, buffers, det_out, seg_out, lane_out, BATCH_SIZE); // cuCtxPopCurrent(&ctx); auto end = std::chrono::system_clock::now(); std::cout << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; // postprocess ~0ms std::vector batch_res; nms(batch_res, det_out, CONF_THRESH, NMS_THRESH); cv::resize(tmp_seg, seg_res, seg_res.size(), 0, 0, cv::INTER_NEAREST); cv::resize(tmp_lane, lane_res, lane_res.size(), 0, 0, cv::INTER_NEAREST); // show results //std::cout << res.size() << std::endl; visualization(cvt_img, seg_res, lane_res, batch_res, key); } // destroy windows #ifdef SHOW_IMG cv::destroyAllWindows(); #endif // close camera img_zed.free(); zed->close(); delete zed; // Release stream and buffers cudaStreamDestroy(stream); CUDA_CHECK(cudaFree(buffers[inputIndex])); CUDA_CHECK(cudaFree(buffers[output_det_index])); CUDA_CHECK(cudaFree(buffers[output_seg_index])); CUDA_CHECK(cudaFree(buffers[output_lane_index])); // Destroy the engine context->destroy(); engine->destroy(); runtime->destroy(); return 0; }