171 lines
6.9 KiB
C++
171 lines
6.9 KiB
C++
#include "yolov9_openvino.h"
|
|
#include <opencv2/dnn.hpp>
|
|
|
|
const vector<string> coconame = {
|
|
"person", "bicycle", "car", "motorcycle", "airplane", "bus", "train",
|
|
"truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
|
|
"bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
|
|
"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie",
|
|
"suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat",
|
|
"baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
|
|
"fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich",
|
|
"orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
|
|
"chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv",
|
|
"laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven",
|
|
"toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
|
|
"teddy bear", "hair drier", "toothbrush" };
|
|
|
|
|
|
Resize Yolov9::resize_and_pad(cv::Mat& img)
|
|
{
|
|
ov::Shape input_shape = compiled_model.input().get_shape();
|
|
|
|
float width = img.cols;
|
|
float height = img.rows;
|
|
float r = float(input_shape[1] / max(width, height));
|
|
int new_unpadW = int(round(width * r));
|
|
int new_unpadH = int(round(height * r));
|
|
Resize resize;
|
|
cv::resize(img, resize.resized_image, cv::Size(new_unpadW, new_unpadH), 0, 0, cv::INTER_AREA);
|
|
|
|
resize.dw = input_shape[1] - new_unpadW;
|
|
resize.dh = input_shape[2] - new_unpadH;
|
|
cv::Scalar color = cv::Scalar(100, 100, 100);
|
|
cv::copyMakeBorder(resize.resized_image, resize.resized_image, 0, resize.dh, 0, resize.dw, cv::BORDER_CONSTANT, color);
|
|
|
|
return resize;
|
|
}
|
|
|
|
Yolov9::Yolov9(const string &model_path)
|
|
{
|
|
// Step 1. Initialize OpenVINO Runtime core
|
|
ov::Core core;
|
|
// Step 2. Read a model
|
|
std::shared_ptr<ov::Model> model = core.read_model(model_path);
|
|
// Step 3. Inizialize Preprocessing for the model
|
|
ov::preprocess::PrePostProcessor ppp = ov::preprocess::PrePostProcessor(model);
|
|
// Specify input image format
|
|
ppp.input().tensor().set_element_type(ov::element::u8).set_layout("NHWC").set_color_format(ov::preprocess::ColorFormat::BGR);
|
|
// Specify preprocess pipeline to input image without resizing
|
|
ppp.input().preprocess().convert_element_type(ov::element::f32).convert_color(ov::preprocess::ColorFormat::RGB).scale({ 255., 255., 255. });
|
|
// Specify model's input layout
|
|
ppp.input().model().set_layout("NCHW");
|
|
// Specify output results format
|
|
ppp.output().tensor().set_element_type(ov::element::f32);
|
|
// Embed above steps in the graph
|
|
model = ppp.build();
|
|
compiled_model = core.compile_model(model, "CPU");
|
|
|
|
// Create random colors
|
|
random_device rd;
|
|
mt19937 gen(rd());
|
|
uniform_int_distribution<int> dis(100, 255);
|
|
for (int i = 0; i < coconame.size(); i++)
|
|
{
|
|
Scalar color = Scalar(dis(gen), dis(gen), dis(gen));
|
|
colors.push_back(color);
|
|
}
|
|
}
|
|
|
|
void Yolov9::predict(cv::Mat &img, std::vector<Detection> &output)
|
|
{
|
|
// Step 5. Create tensor from image
|
|
float* input_data = (float*)img.data;
|
|
ov::Tensor input_tensor = ov::Tensor(compiled_model.input().get_element_type(), compiled_model.input().get_shape(), input_data);
|
|
|
|
// Step 6. Create an infer request for model inference
|
|
ov::InferRequest infer_request = compiled_model.create_infer_request();
|
|
infer_request.set_input_tensor(input_tensor);
|
|
infer_request.infer();
|
|
|
|
//Step 7. Retrieve inference results
|
|
const ov::Tensor& output_tensor = infer_request.get_output_tensor();
|
|
ov::Shape output_shape = output_tensor.get_shape();
|
|
|
|
float* detections = output_tensor.data<float>();
|
|
|
|
// Step 8. Postprocessing including NMS
|
|
vector<Rect> boxes;
|
|
vector<int> class_ids;
|
|
vector<float> confidences;
|
|
|
|
const Mat det_output(output_shape[1], output_shape[2], CV_32F, detections);
|
|
|
|
for (int i = 0; i < det_output.cols; ++i) {
|
|
const Mat classes_scores = det_output.col(i).rowRange(4, output_shape[1]);
|
|
Point class_id_point;
|
|
double score;
|
|
minMaxLoc(classes_scores, nullptr, &score, nullptr, &class_id_point);
|
|
|
|
if (score > CONFIDENCE_THRESHOLD) {
|
|
const float cx = det_output.at<float>(0, i);
|
|
const float cy = det_output.at<float>(1, i);
|
|
const float ow = det_output.at<float>(2, i);
|
|
const float oh = det_output.at<float>(3, i);
|
|
Rect box;
|
|
box.x = static_cast<int>((cx - 0.5 * ow));
|
|
box.y = static_cast<int>((cy - 0.5 * oh));
|
|
box.width = static_cast<int>(ow);
|
|
box.height = static_cast<int>(oh);
|
|
|
|
boxes.push_back(box);
|
|
class_ids.push_back(class_id_point.y);
|
|
confidences.push_back(score);
|
|
}
|
|
}
|
|
|
|
vector<int> nms_result;
|
|
dnn::NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD, nms_result);
|
|
|
|
for (int i = 0; i < nms_result.size(); i++)
|
|
{
|
|
Detection result;
|
|
int idx = nms_result[i];
|
|
result.class_id = class_ids[idx];
|
|
result.confidence = confidences[idx];
|
|
result.box = boxes[idx];
|
|
output.push_back(result);
|
|
}
|
|
}
|
|
|
|
void Yolov9::draw(Mat& img, vector<Detection>& output, float dw, float dh)
|
|
{
|
|
// Step 9. Print results and save Figure with detections
|
|
ov::Shape input_shape = compiled_model.input().get_shape();
|
|
|
|
for (int i = 0; i < output.size(); i++)
|
|
{
|
|
auto detection = output[i];
|
|
auto box = detection.box;
|
|
auto classId = detection.class_id;
|
|
auto confidence = detection.confidence;
|
|
float rx = (float)img.cols / (float)(input_shape[1] - dw);
|
|
float ry = (float)img.rows / (float)(input_shape[2] - dh);
|
|
box.x = rx * box.x;
|
|
box.y = ry * box.y;
|
|
box.width = rx * box.width;
|
|
box.height = ry * box.height;
|
|
float xmax = box.x + box.width;
|
|
float ymax = box.y + box.height;
|
|
|
|
rectangle(img, Point(box.x, box.y), Point(box.x + box.width, box.y + box.height), colors[classId], 3);
|
|
|
|
// Detection box text
|
|
string class_string = coconame[classId] + ' ' + to_string(confidence).substr(0, 4);
|
|
Size text_size = getTextSize(class_string, FONT_HERSHEY_DUPLEX, 1, 2, 0);
|
|
Rect text_rect(box.x, box.y - 40, text_size.width + 10, text_size.height + 20);
|
|
rectangle(img, text_rect, colors[classId], FILLED);
|
|
putText(img, class_string, Point(box.x + 5, box.y - 10), FONT_HERSHEY_DUPLEX, 1, Scalar(0, 0, 0), 2, 0);
|
|
}
|
|
}
|
|
|
|
void Yolov9::setConf(float conf)
|
|
{
|
|
CONFIDENCE_THRESHOLD = conf;
|
|
}
|
|
|
|
void Yolov9::setNMS(float nms)
|
|
{
|
|
NMS_THRESHOLD = nms;
|
|
}
|