Zed_FS_Inference/zed_inference.cpp at main · SiBensberg/Zed_FS_Inference · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#include "zed_inference.h"

// General imports
#include <unistd.h>

// ZED includes
#include <sl/Camera.hpp>

// OpenCV includes
#include <opencv2/opencv.hpp>
// OpenCV dep
#include <opencv2/cvconfig.h>

// Local imports
#include "slmat_to_cvmat.h" // Functions to show images

// onnx includes
//#include "onnx/onnx_pb.h"
//#include "onnx/proto_utils.h"
#include <onnxruntime_cxx_api.h>


ZedInference::ZedInference(): Detector("../model/saved_model.onnx") {
    std::cout << "Created ZedInference Class" << std::endl;
    this->running = false;

    // Set configuration parameters
    init_params.depth_mode = sl::DEPTH_MODE::ULTRA; // Use ULTRA depth mode
    init_params.coordinate_units = sl::UNIT::METER; // Use millimeter units (for depth measurements)
}

void ZedInference::grabRgbImage() {
    sl::Resolution image_size = zed.getCameraInformation().camera_resolution;

    sl::Mat image_zed(image_size.width, image_size.height, sl::MAT_TYPE::U8_C4);
    sl::Mat point_cloud;
    //sl::Mat image_zed(zed.getResolution(), MAT_TYPE::U8_C4);
    cv::Mat image_ocv = slMat2cvMat(image_zed);

    std::vector<std::vector<float>> bboxes;
    std::vector<std::vector<float>> distances;

    if (zed.grab() == sl::ERROR_CODE::SUCCESS) {
        // std::cout << "grab image" << std::endl;
        // Retrieve the left image in sl::Mat
        // The cv::Mat is automatically updated
        zed.retrieveImage(image_zed, sl::VIEW::LEFT);
        zed.retrieveMeasure(point_cloud, sl::MEASURE::XYZRGBA); // Retrieve pointcloud

        // inference image
        bboxes = ZedInference::inferenceRgbImage(image_ocv);

        // Calculate Depth:
        distances = ZedInference::calculateDepth(bboxes, point_cloud);

        if (visualize) {
            ZedInference::visualizeDetections(image_ocv, bboxes, distances);
        }
        // publish distances here

    } else {
        std::cout << "Could not grab image.\nWaiting for 5 seconds" << std::endl;
        sleep(5);
    }
}

int ZedInference::run(){
    running = true;
    bool camera_open = false;

    sl::InitParameters init_params;
    init_params.camera_resolution = sl::RESOLUTION::HD720;
    init_params.depth_mode = sl::DEPTH_MODE::ULTRA;
    init_params.camera_fps = 100;
    init_params.coordinate_units = sl::UNIT::METER;

    // Open the camera
    std::cout << "Opening Camera..." << std::endl;
    if (camera_open == false) {
        sl::ERROR_CODE err = zed.open(init_params);
        camera_open = true;

        if (err != sl::ERROR_CODE::SUCCESS) {
            printf("%s\n", toString(err).c_str());
            // zed.close();
            camera_open = false; // Quit if an error occurred
            // return 0;
        }
    }

    // If Camera could not be opened try with svo
    if (camera_open == false){
        std::cout << "\nOpening SVO..." << std::endl;
        sl::String input_svo_path(this->svo_path.c_str());
        init_params.input.setFromSVOFile(input_svo_path);
        sl::ERROR_CODE err = zed.open(init_params);

        if (err != sl::ERROR_CODE::SUCCESS){
            printf("%s\n", toString(err).c_str());
            zed.close();
            camera_open = false;
            return 0;
        }
        else {
            camera_open = true;
            std::cout << "Successfully opened: " << this->svo_path << std::endl;
        }
    }

    // run camera
    if (running && camera_open){
        std::cout << "\nRun camera: " << std::endl;
    }
    while (running && camera_open){
        ZedInference::grabRgbImage();
    }

}

std::vector<std::vector<float>> ZedInference::calculateDepth(const std::vector<std::vector<float>>& bboxes,
                                                             const sl::Mat &point_cloud) {
    // Init vector that needs to be filled and is later returned. Vector with vectors for every detected object in it.
    std::vector<std::vector<float>> coneDistances;

    // Extract depth from pointcloud with given box coordinates
    // Check if boxes are not empty:
    if (not bboxes.empty()) {
        for (const auto &box: bboxes) {
            int i, j;

            // take the lowest point in the mid of the bbox
            auto xmid = (box[3] + (box[5] - box[3]) / 2);
            i = (int) xmid;
            j = (int) box[4]; // simply bottom of box

            // Get the 3D point cloud values for pixel (i,j)
            sl::float4 point3D;
            point_cloud.getValue(i,j,&point3D);
            //float color = point3D.w;

            // fill box class, confidence, x, y, z
            coneDistances.push_back({box[0], box[1], point3D.x, point3D.y, point3D.z});

        }
    }

    return coneDistances;
}

std::vector<std::vector<float>> ZedInference::inferenceRgbImage(const cv::Mat &rgb_image) {
    std::vector<std::vector<float>> Boxes;
    Boxes = ZedInference::Detector.inference(rgb_image);

    return Boxes;
}

void ZedInference::visualizeDetections(const cv::Mat& inputImage, const std::vector<std::vector<float>> &bboxes, const std::vector<std::vector<float>> &distances) {
    int boxIndex = 0;

    for(std::vector<float> box: bboxes) {
        // Get depth data from distances:
        std::vector<float> boxDistance = distances[boxIndex];
        boxIndex += 1;

        // Get box coordinates
        std::vector<int> coordinates;
        for (int i=2; i < box.size(); ++i) {
            coordinates.push_back((int) box[i]);
        }
        int class_id = (int) box[0];
        float confidence = box[1];

        // print boxes around detected objects:
        cv::Point min = cv::Point(coordinates[1], coordinates[0]); // ymin and xmin
        cv::Point max = cv::Point(coordinates[3],  coordinates[2]); //ymax and xmax
        cv::rectangle(inputImage, min, max, this->COLORS[class_id - 1], 1.5); // -1 because there is a background id 0

        // print coordinates next to boxes:
        //float x = boxDistance[2];
        //float y = boxDistance[3];
        //float z = boxDistance[4];

        std::vector<std::string> coords_to_string; // x,y,z

        // convert distances to objects to string with precision 2
        for(int n=2; n<=4; ++n) {
            std::stringstream stream;
            stream << std::fixed << std::setprecision(2) << boxDistance[n];
            coords_to_string.push_back(stream.str());
        }

        std::string xText = "x: " + coords_to_string[0] + "m";
        std::string yText = "y: " + coords_to_string[1] + "m";
        std::string zText = "z: " + coords_to_string[2] + "m";
        std::stringstream conf_stream;
        conf_stream << std::fixed << std::setprecision(2) << confidence;
        std::string confText = "P: " + conf_stream.str();

        float fontScale = 0.5;
        int thickness = 1;
        int baseline = 0;
        int textSpacing = 2;

        cv::Size text_size = cv::getTextSize(xText, cv::FONT_HERSHEY_SIMPLEX, fontScale, thickness, &baseline);
        baseline += thickness;

        cv::Point xTextPoint = cv::Point(coordinates[3]+5, coordinates[0]+(text_size.height+textSpacing));
        cv::Point yTextPoint = cv::Point(coordinates[3]+5, coordinates[0]+(text_size.height+textSpacing)*2);
        cv::Point zTextPoint = cv::Point(coordinates[3]+5, coordinates[0]+(text_size.height+textSpacing)*3);
        cv::Point confTextPoint = cv::Point(coordinates[3]+5, coordinates[0]+(text_size.height+textSpacing)*4);

        cv::putText(inputImage, xText, xTextPoint, cv::FONT_HERSHEY_SIMPLEX, fontScale, this->COLORS[class_id-1], thickness);
        cv::putText(inputImage, yText, yTextPoint, cv::FONT_HERSHEY_SIMPLEX, fontScale, this->COLORS[class_id-1], thickness);
        cv::putText(inputImage, zText, zTextPoint, cv::FONT_HERSHEY_SIMPLEX, fontScale, this->COLORS[class_id-1], thickness);
        cv::putText(inputImage, confText, confTextPoint, cv::FONT_HERSHEY_SIMPLEX, fontScale, this->COLORS[class_id-1], thickness);
    }
    cv::imshow("Image", inputImage);
    cv::waitKey(1);


    }