Object Detection
File size: 5,267 Bytes
67bb36a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#include "yolov5.hpp"
#include "zedcam.hpp"
#include <csignal>

static volatile bool keep_running = true;


void keyboard_handler(int sig) {
    // handle keyboard interrupt
    if (sig == SIGINT)
        keep_running = false;
}


int main(int argc, char** argv) {
    signal(SIGINT, keyboard_handler);
    cudaSetDevice(DEVICE);
    // CUcontext ctx;
    // CUdevice device;
    // cuInit(0);
    // cuDeviceGet(&device, 0);
    // cuCtxCreate(&ctx, 0, device);

    std::string engine_name = "../mcnet.engine";

    // deserialize the .engine and run inference
    std::ifstream file(engine_name, std::ios::binary);
    if (!file.good()) {
        std::cerr << "read " << engine_name << " error!" << std::endl;
        return -1;
    }
    char *trtModelStream = nullptr;
    size_t size = 0;
    file.seekg(0, file.end);
    size = file.tellg();
    file.seekg(0, file.beg);
    trtModelStream = new char[size];
    assert(trtModelStream);
    file.read(trtModelStream, size);
    file.close();

    // prepare data ---------------------------
    static float det_out[BATCH_SIZE * OUTPUT_SIZE];
    static int seg_out[BATCH_SIZE * IMG_H * IMG_W];
    static int lane_out[BATCH_SIZE * IMG_H * IMG_W];
    IRuntime* runtime = createInferRuntime(gLogger);
    assert(runtime != nullptr);
    ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
    assert(engine != nullptr);
    IExecutionContext* context = engine->createExecutionContext();
    assert(context != nullptr);
    delete[] trtModelStream;
    assert(engine->getNbBindings() == 4);
    void* buffers[4];
    // In order to bind the buffers, we need to know the names of the input and output tensors.
    // Note that indices are guaranteed to be less than IEngine::getNbBindings()
    const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
    const int output_det_index = engine->getBindingIndex(OUTPUT_DET_NAME);
    const int output_seg_index = engine->getBindingIndex(OUTPUT_SEG_NAME);
    const int output_lane_index = engine->getBindingIndex(OUTPUT_LANE_NAME);
    assert(inputIndex == 0);
    assert(output_det_index == 1);
    assert(output_seg_index == 2);
    assert(output_lane_index == 3);
    // Create GPU buffers on device
    CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
    CUDA_CHECK(cudaMalloc(&buffers[output_det_index], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
    CUDA_CHECK(cudaMalloc(&buffers[output_seg_index], BATCH_SIZE * IMG_H * IMG_W * sizeof(int)));
    CUDA_CHECK(cudaMalloc(&buffers[output_lane_index], BATCH_SIZE * IMG_H * IMG_W * sizeof(int)));
    // Create stream
    cudaStream_t stream;
    CUDA_CHECK(cudaStreamCreate(&stream));

    // create zed
    auto zed = create_camera();
    sl::Resolution image_size = zed->getCameraInformation().camera_configuration.resolution;
    sl::Mat img_zed(image_size.width, image_size.height, sl::MAT_TYPE::U8_C4, sl::MEM::GPU);
    cv::cuda::GpuMat img_ocv = slMat2cvMatGPU(img_zed);
    cv::cuda::GpuMat cvt_img(image_size.height, image_size.width, CV_8UC3);

    // store seg results
    cv::Mat tmp_seg(IMG_H, IMG_W, CV_32S, seg_out);
    // sotore lane results
    cv::Mat tmp_lane(IMG_H, IMG_W, CV_32S, lane_out);
    cv::Mat seg_res(image_size.height, image_size.width, CV_32S);
    cv::Mat lane_res(image_size.height, image_size.width, CV_32S);

    char key = ' ';
    while (keep_running and key != 'q') {
        // retrieve img
        if (zed->grab() != sl::ERROR_CODE::SUCCESS) continue;
        zed->retrieveImage(img_zed, sl::VIEW::LEFT, sl::MEM::GPU);
        cudaSetDevice(DEVICE);
        cv::cuda::cvtColor(img_ocv, cvt_img, cv::COLOR_BGRA2BGR);
        
        // preprocess ~3ms
        preprocess_img_gpu(cvt_img, (float*)buffers[inputIndex], INPUT_W, INPUT_H); // letterbox
        
        // buffers[inputIndex] = pr_img.data;
        // Run inference
        auto start = std::chrono::system_clock::now();
        // cuCtxPushCurrent(ctx);
        doInference(*context, stream, buffers, det_out, seg_out, lane_out, BATCH_SIZE);
        // cuCtxPopCurrent(&ctx);
        auto end = std::chrono::system_clock::now();
        std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

        // postprocess ~0ms
        std::vector<Yolo::Detection> batch_res;
        nms(batch_res, det_out, CONF_THRESH, NMS_THRESH);
        cv::resize(tmp_seg, seg_res, seg_res.size(), 0, 0, cv::INTER_NEAREST);
        cv::resize(tmp_lane, lane_res, lane_res.size(), 0, 0, cv::INTER_NEAREST);

        // show results
        //std::cout << res.size() << std::endl;
        visualization(cvt_img, seg_res, lane_res, batch_res, key);
    }
    // destroy windows
#ifdef SHOW_IMG
    cv::destroyAllWindows();
#endif
    // close camera
    img_zed.free();
    zed->close();
    delete zed;
    // Release stream and buffers
    cudaStreamDestroy(stream);
    CUDA_CHECK(cudaFree(buffers[inputIndex]));
    CUDA_CHECK(cudaFree(buffers[output_det_index]));
    CUDA_CHECK(cudaFree(buffers[output_seg_index]));
    CUDA_CHECK(cudaFree(buffers[output_lane_index]));
    // Destroy the engine
    context->destroy();
    engine->destroy();
    runtime->destroy();
    return 0;
}